From f0c6227c06c63ce19f8e372e7db243cdf751bc88 Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Wed, 13 May 2026 18:42:57 +0100
Subject: [PATCH 01/10] added config for devnet test run

Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
---
 docker/telemetry/validators-devnet.txt | 11 ++++
 docker/telemetry/xrpld-telemetry.cfg   | 89 +++++++++++++++++++++++---
 2 files changed, 90 insertions(+), 10 deletions(-)
 create mode 100644 docker/telemetry/validators-devnet.txt

diff --git a/docker/telemetry/validators-devnet.txt b/docker/telemetry/validators-devnet.txt
new file mode 100644
index 0000000000..644dac96f2
--- /dev/null
+++ b/docker/telemetry/validators-devnet.txt
@@ -0,0 +1,11 @@
+# Devnet validator list configuration.
+#
+# Uses the Devnet validator list publisher operated by Ripple.
+# This fetches the current set of trusted validators for the
+# XRP Ledger Devnet automatically.
+
+[validator_list_sites]
+https://vl.devnet.rippletest.net
+
+[validator_list_keys]
+EDBB54B0D9AEE071BB37784AF5A9E7CC49AC7A0EFCE868C54532BCB966B9CFC13B
diff --git a/docker/telemetry/xrpld-telemetry.cfg b/docker/telemetry/xrpld-telemetry.cfg
index 008a5f566a..1609c270fd 100644
--- a/docker/telemetry/xrpld-telemetry.cfg
+++ b/docker/telemetry/xrpld-telemetry.cfg
@@ -1,17 +1,25 @@
-# Standalone xrpld configuration with OpenTelemetry enabled.
+# xrpld configuration for Devnet with full OpenTelemetry tracing.
+#
+# Connects to the XRP Ledger Devnet and exercises ALL instrumented
+# workflows: RPC, transactions, consensus, peer overlay, ledger ops,
+# and pathfinding.
 #
 # Usage:
 #   1. Start the observability stack:
 #        docker compose -f docker/telemetry/docker-compose.yml up -d
-#   2. Run xrpld in standalone mode:
-#        ./xrpld --conf docker/telemetry/xrpld-telemetry.cfg -a --start
-#   3. Send RPC commands to exercise tracing:
+#   2. Run xrpld:
+#        ./xrpld --conf docker/telemetry/xrpld-telemetry.cfg
+#   3. Wait for sync (server_state=full), then exercise workflows:
 #        curl -s http://localhost:5005 -d '{"method":"server_info"}'
-#   4. View traces in Jaeger UI: http://localhost:16686
+#   4. View traces in Grafana Explore -> Tempo: http://localhost:3000
+
+# --- Server ports -----------------------------------------------------------
 
 [server]
 port_rpc_admin_local
 port_ws_admin_local
+port_ws_public
+port_peer
 
 [port_rpc_admin_local]
 port = 5005
@@ -25,29 +33,90 @@ ip = 127.0.0.1
 admin = 127.0.0.1
 protocol = ws
 
+[port_ws_public]
+port = 6005
+ip = 0.0.0.0
+protocol = ws
+
+[port_peer]
+port = 51235
+ip = 0.0.0.0
+protocol = peer
+
+# --- Network ----------------------------------------------------------------
+
+[network_id]
+devnet
+
+[ips]
+s.devnet.rippletest.net 51235
+
+[validators_file]
+docker/telemetry/validators-devnet.txt
+
+[peer_private]
+0
+
+[peers_max]
+21
+
+# --- Pathfinding (exercises ripple_path_find / path_find workflows) ---------
+
+[path_search]
+7
+
+[path_search_fast]
+2
+
+[path_search_max]
+10
+
+# --- Signing (allows sign/sign_for RPC for test tx submission) --------------
+
+[signing_support]
+true
+
+# --- Database ---------------------------------------------------------------
+
 [node_db]
 type=NuDB
 path=docker/telemetry/data/nudb
-online_delete=256
+online_delete=2000
 advisory_delete=0
 
 [database_path]
 docker/telemetry/data
 
+[ledger_history]
+1000
+
+# --- Logging ----------------------------------------------------------------
+
 [debug_logfile]
-docker/telemetry/data/debug.log
+/tmp/xrpld-integration/devnet/debug.log
 
 [rpc_startup]
 { "command": "log_level", "severity": "debug" }
 
+# --- SSL --------------------------------------------------------------------
+
 [ssl_verify]
 0
 
-# --- OpenTelemetry tracing ---
+# --- Insight (native OTel metrics via beast::insight) -----------------------
+
+[insight]
+server=otel
+endpoint=http://localhost:4318/v1/metrics
+prefix=rippled
+
+# --- OpenTelemetry tracing --------------------------------------------------
+
 [telemetry]
 enabled=1
-service_instance_id=xrpld-standalone
+service_instance_id=xrpld-devnet
 endpoint=http://localhost:4318/v1/traces
+metrics_endpoint=http://localhost:4318/v1/metrics
 exporter=otlp_http
 sampling_ratio=1.0
 batch_size=512
@@ -56,5 +125,5 @@ max_queue_size=2048
 trace_rpc=1
 trace_transactions=1
 trace_consensus=1
-trace_peer=0
+trace_peer=1
 trace_ledger=1

From 20477e549471bd8b8a978606c2b1e2e06a33eb0e Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Wed, 13 May 2026 18:49:21 +0100
Subject: [PATCH 02/10] validator path changes

Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
---
 docker/telemetry/xrpld-telemetry.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/telemetry/xrpld-telemetry.cfg b/docker/telemetry/xrpld-telemetry.cfg
index 1609c270fd..05764d32c0 100644
--- a/docker/telemetry/xrpld-telemetry.cfg
+++ b/docker/telemetry/xrpld-telemetry.cfg
@@ -52,7 +52,7 @@ devnet
 s.devnet.rippletest.net 51235
 
 [validators_file]
-docker/telemetry/validators-devnet.txt
+validators-devnet.txt
 
 [peer_private]
 0

From 02fe838257f10dff8ca075341d2288610a6b8542 Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Wed, 13 May 2026 19:00:36 +0100
Subject: [PATCH 03/10] auto refresh at 5seconds

Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
---
 .../grafana/dashboards/consensus-health.json  |  3 +-
 .../grafana/dashboards/ledger-operations.json |  3 +-
 .../grafana/dashboards/peer-network.json      |  3 +-
 .../dashboards/rippled-fee-market.json        |  3 +-
 .../grafana/dashboards/rippled-job-queue.json |  9 ++--
 .../dashboards/rippled-peer-quality.json      | 13 ++++--
 .../grafana/dashboards/rippled-rpc-perf.json  |  7 ++--
 .../dashboards/rippled-validator-health.json  | 13 ++++--
 .../grafana/dashboards/rpc-performance.json   |  5 ++-
 .../dashboards/system-ledger-data-sync.json   |  3 +-
 .../dashboards/system-network-traffic.json    |  3 +-
 .../dashboards/system-node-health.json        | 42 +++++++++++++++----
 .../system-overlay-traffic-detail.json        |  3 +-
 .../dashboards/system-rpc-pathfinding.json    |  3 +-
 .../dashboards/transaction-overview.json      |  5 ++-
 15 files changed, 85 insertions(+), 33 deletions(-)

diff --git a/docker/telemetry/grafana/dashboards/consensus-health.json b/docker/telemetry/grafana/dashboards/consensus-health.json
index d40f42cc58..f25d61aa8e 100644
--- a/docker/telemetry/grafana/dashboards/consensus-health.json
+++ b/docker/telemetry/grafana/dashboards/consensus-health.json
@@ -772,5 +772,6 @@
     "to": "now"
   },
   "title": "Consensus Health",
-  "uid": "rippled-consensus"
+  "uid": "rippled-consensus",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/ledger-operations.json b/docker/telemetry/grafana/dashboards/ledger-operations.json
index c9c8c5efc3..226299462c 100644
--- a/docker/telemetry/grafana/dashboards/ledger-operations.json
+++ b/docker/telemetry/grafana/dashboards/ledger-operations.json
@@ -349,5 +349,6 @@
     "to": "now"
   },
   "title": "Ledger Operations",
-  "uid": "rippled-ledger-ops"
+  "uid": "rippled-ledger-ops",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/peer-network.json b/docker/telemetry/grafana/dashboards/peer-network.json
index 0fd6e6048f..2a1b34617e 100644
--- a/docker/telemetry/grafana/dashboards/peer-network.json
+++ b/docker/telemetry/grafana/dashboards/peer-network.json
@@ -223,5 +223,6 @@
     "to": "now"
   },
   "title": "Peer Network",
-  "uid": "rippled-peer-net"
+  "uid": "rippled-peer-net",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/rippled-fee-market.json b/docker/telemetry/grafana/dashboards/rippled-fee-market.json
index 09e777835e..15efdfe672 100644
--- a/docker/telemetry/grafana/dashboards/rippled-fee-market.json
+++ b/docker/telemetry/grafana/dashboards/rippled-fee-market.json
@@ -339,5 +339,6 @@
   "timezone": "browser",
   "title": "Fee Market & TxQ",
   "uid": "rippled-fee-market",
-  "version": 1
+  "version": 1,
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/rippled-job-queue.json b/docker/telemetry/grafana/dashboards/rippled-job-queue.json
index 7536fac767..dfc5cf490f 100644
--- a/docker/telemetry/grafana/dashboards/rippled-job-queue.json
+++ b/docker/telemetry/grafana/dashboards/rippled-job-queue.json
@@ -214,7 +214,7 @@
             "drawStyle": "line",
             "lineWidth": 2,
             "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
             "spanNulls": true,
             "insertNulls": false,
             "showPoints": "auto",
@@ -273,7 +273,7 @@
             "drawStyle": "line",
             "lineWidth": 2,
             "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
             "spanNulls": true,
             "insertNulls": false,
             "showPoints": "auto",
@@ -323,7 +323,7 @@
             "drawStyle": "line",
             "lineWidth": 1,
             "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
             "spanNulls": true,
             "insertNulls": false,
             "showPoints": "auto",
@@ -391,5 +391,6 @@
   "timezone": "browser",
   "title": "Job Queue Analysis",
   "uid": "rippled-job-queue",
-  "version": 1
+  "version": 1,
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/rippled-peer-quality.json b/docker/telemetry/grafana/dashboards/rippled-peer-quality.json
index 9d5dfbb0b8..c3673d1d46 100644
--- a/docker/telemetry/grafana/dashboards/rippled-peer-quality.json
+++ b/docker/telemetry/grafana/dashboards/rippled-peer-quality.json
@@ -202,13 +202,19 @@
             {
               "type": "value",
               "options": {
-                "0": { "text": "No", "color": "green" }
+                "0": {
+                  "text": "No",
+                  "color": "green"
+                }
               }
             },
             {
               "type": "value",
               "options": {
-                "1": { "text": "Yes", "color": "red" }
+                "1": {
+                  "text": "Yes",
+                  "color": "red"
+                }
               }
             }
           ],
@@ -387,5 +393,6 @@
     "to": "now"
   },
   "title": "Peer Quality",
-  "uid": "rippled-peer-quality"
+  "uid": "rippled-peer-quality",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json b/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json
index 2c6d8594fb..fbbb2ef438 100644
--- a/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json
+++ b/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json
@@ -214,7 +214,7 @@
             "drawStyle": "line",
             "lineWidth": 2,
             "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
             "spanNulls": true,
             "insertNulls": false,
             "showPoints": "auto",
@@ -264,7 +264,7 @@
             "drawStyle": "line",
             "lineWidth": 1,
             "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
             "spanNulls": true,
             "insertNulls": false,
             "showPoints": "auto",
@@ -400,5 +400,6 @@
   "timezone": "browser",
   "title": "RPC Performance (OTel)",
   "uid": "rippled-rpc-perf",
-  "version": 1
+  "version": 1,
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/rippled-validator-health.json b/docker/telemetry/grafana/dashboards/rippled-validator-health.json
index 1a4d453bee..d5c6df4d18 100644
--- a/docker/telemetry/grafana/dashboards/rippled-validator-health.json
+++ b/docker/telemetry/grafana/dashboards/rippled-validator-health.json
@@ -381,13 +381,19 @@
             {
               "type": "value",
               "options": {
-                "0": { "text": "OK", "color": "green" }
+                "0": {
+                  "text": "OK",
+                  "color": "green"
+                }
               }
             },
             {
               "type": "value",
               "options": {
-                "1": { "text": "BLOCKED", "color": "red" }
+                "1": {
+                  "text": "BLOCKED",
+                  "color": "red"
+                }
               }
             }
           ],
@@ -710,5 +716,6 @@
     "to": "now"
   },
   "title": "Validator Health",
-  "uid": "rippled-validator-health"
+  "uid": "rippled-validator-health",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/rpc-performance.json b/docker/telemetry/grafana/dashboards/rpc-performance.json
index 7834ec4029..5eee79c985 100644
--- a/docker/telemetry/grafana/dashboards/rpc-performance.json
+++ b/docker/telemetry/grafana/dashboards/rpc-performance.json
@@ -328,7 +328,7 @@
       {
         "name": "node",
         "label": "Node",
-        "description": "Filter by rippled node (service.instance.id — e.g. Node-1)",
+        "description": "Filter by rippled node (service.instance.id \u2014 e.g. Node-1)",
         "type": "query",
         "query": "label_values(traces_span_metrics_calls_total, exported_instance)",
         "datasource": {
@@ -372,5 +372,6 @@
     "to": "now"
   },
   "title": "RPC Performance",
-  "uid": "rippled-rpc-perf"
+  "uid": "rippled-rpc-perf",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/system-ledger-data-sync.json b/docker/telemetry/grafana/dashboards/system-ledger-data-sync.json
index 67148abb63..5ad05aafb7 100644
--- a/docker/telemetry/grafana/dashboards/system-ledger-data-sync.json
+++ b/docker/telemetry/grafana/dashboards/system-ledger-data-sync.json
@@ -523,5 +523,6 @@
     "to": "now"
   },
   "title": "Ledger Data & Sync (System Metrics)",
-  "uid": "rippled-system-ledger-sync"
+  "uid": "rippled-system-ledger-sync",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/system-network-traffic.json b/docker/telemetry/grafana/dashboards/system-network-traffic.json
index 82faa28476..6d85596a3a 100644
--- a/docker/telemetry/grafana/dashboards/system-network-traffic.json
+++ b/docker/telemetry/grafana/dashboards/system-network-traffic.json
@@ -688,5 +688,6 @@
     "to": "now"
   },
   "title": "Network Traffic (System Metrics)",
-  "uid": "rippled-system-network"
+  "uid": "rippled-system-network",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/system-node-health.json b/docker/telemetry/grafana/dashboards/system-node-health.json
index fec7bec7cc..7827ef8bb9 100644
--- a/docker/telemetry/grafana/dashboards/system-node-health.json
+++ b/docker/telemetry/grafana/dashboards/system-node-health.json
@@ -162,7 +162,7 @@
       ],
       "fieldConfig": {
         "defaults": {
-          "unit": "µs",
+          "unit": "\u00b5s",
           "custom": {
             "axisLabel": "Duration",
             "spanNulls": true,
@@ -289,7 +289,7 @@
     },
     {
       "title": "Job Queue Depth",
-      "description": "Current number of jobs waiting in the job queue. Sourced from the job_count gauge (JobQueue.cpp:26). A sustained high value indicates the node cannot process work fast enough — common during ledger replay or heavy RPC load.",
+      "description": "Current number of jobs waiting in the job queue. Sourced from the job_count gauge (JobQueue.cpp:26). A sustained high value indicates the node cannot process work fast enough \u2014 common during ledger replay or heavy RPC load.",
       "type": "timeseries",
       "gridPos": {
         "h": 8,
@@ -764,23 +764,48 @@
           "mappings": [
             {
               "type": "value",
-              "options": { "0": { "text": "DISCONNECTED", "color": "red" } }
+              "options": {
+                "0": {
+                  "text": "DISCONNECTED",
+                  "color": "red"
+                }
+              }
             },
             {
               "type": "value",
-              "options": { "1": { "text": "CONNECTED", "color": "orange" } }
+              "options": {
+                "1": {
+                  "text": "CONNECTED",
+                  "color": "orange"
+                }
+              }
             },
             {
               "type": "value",
-              "options": { "2": { "text": "SYNCING", "color": "yellow" } }
+              "options": {
+                "2": {
+                  "text": "SYNCING",
+                  "color": "yellow"
+                }
+              }
             },
             {
               "type": "value",
-              "options": { "3": { "text": "TRACKING", "color": "blue" } }
+              "options": {
+                "3": {
+                  "text": "TRACKING",
+                  "color": "blue"
+                }
+              }
             },
             {
               "type": "value",
-              "options": { "4": { "text": "FULL", "color": "green" } }
+              "options": {
+                "4": {
+                  "text": "FULL",
+                  "color": "green"
+                }
+              }
             }
           ],
           "custom": {}
@@ -1397,5 +1422,6 @@
     "to": "now"
   },
   "title": "Node Health (System Metrics)",
-  "uid": "rippled-system-node-health"
+  "uid": "rippled-system-node-health",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/system-overlay-traffic-detail.json b/docker/telemetry/grafana/dashboards/system-overlay-traffic-detail.json
index 5ff2fbf4af..393339f7e3 100644
--- a/docker/telemetry/grafana/dashboards/system-overlay-traffic-detail.json
+++ b/docker/telemetry/grafana/dashboards/system-overlay-traffic-detail.json
@@ -583,5 +583,6 @@
     "to": "now"
   },
   "title": "Overlay Traffic Detail (System Metrics)",
-  "uid": "rippled-system-overlay-detail"
+  "uid": "rippled-system-overlay-detail",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/system-rpc-pathfinding.json b/docker/telemetry/grafana/dashboards/system-rpc-pathfinding.json
index 5e631747dc..61f16c72ad 100644
--- a/docker/telemetry/grafana/dashboards/system-rpc-pathfinding.json
+++ b/docker/telemetry/grafana/dashboards/system-rpc-pathfinding.json
@@ -413,5 +413,6 @@
     "to": "now"
   },
   "title": "RPC & Pathfinding (System Metrics)",
-  "uid": "rippled-system-rpc"
+  "uid": "rippled-system-rpc",
+  "refresh": "5s"
 }
diff --git a/docker/telemetry/grafana/dashboards/transaction-overview.json b/docker/telemetry/grafana/dashboards/transaction-overview.json
index 1d6a4c0dd0..3eb5f2687e 100644
--- a/docker/telemetry/grafana/dashboards/transaction-overview.json
+++ b/docker/telemetry/grafana/dashboards/transaction-overview.json
@@ -336,7 +336,7 @@
       {
         "name": "node",
         "label": "Node",
-        "description": "Filter by rippled node (service.instance.id — e.g. Node-1)",
+        "description": "Filter by rippled node (service.instance.id \u2014 e.g. Node-1)",
         "type": "query",
         "query": "label_values(traces_span_metrics_calls_total, exported_instance)",
         "datasource": {
@@ -380,5 +380,6 @@
     "to": "now"
   },
   "title": "Transaction Overview",
-  "uid": "rippled-transactions"
+  "uid": "rippled-transactions",
+  "refresh": "5s"
 }

From 93caaba5cacaec3a2218aaf40fca626737075e34 Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Thu, 14 May 2026 12:33:18 +0100
Subject: [PATCH 04/10] =?UTF-8?q?fix(telemetry):=20recover=20Phase=206=20d?=
 =?UTF-8?q?ashboard=20panels=20lost=20during=20statsd=E2=86=92system=20ren?=
 =?UTF-8?q?ame?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Panels 8-15 from statsd-node-health.json and panels 8-9 from
statsd-network-traffic.json were lost when Phase 7 renamed these files
to system-*. The merge (5cd71ed107) took Phase 7's smaller version
without the extra panels added by commit b933e8ae00 on Phase 6.

Recovered panels (system-node-health.json):
- Key Jobs Execution Time (11 job types)
- Key Jobs Dequeue Wait Time (11 job types)
- FullBelowCache Size
- FullBelowCache Hit Rate
- Ledger Publish Gap (validated - published age delta)
- State Duration Rate (Full vs Tracking)
- All Jobs Execution Time Detail (34 job types)
- All Jobs Dequeue Wait Detail (34 job types)

Recovered panels (system-network-traffic.json):
- Duplicate Traffic (Wasted Bandwidth)
- All Traffic Categories Detail (topk 15 by byte rate)

All recovered panels updated to include exported_instance=~"$node"
filter per project dashboard guidelines.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../dashboards/system-network-traffic.json    | 113 ++++
 .../dashboards/system-node-health.json        | 495 +++++++++++++++++-
 2 files changed, 606 insertions(+), 2 deletions(-)

diff --git a/docker/telemetry/grafana/dashboards/system-network-traffic.json b/docker/telemetry/grafana/dashboards/system-network-traffic.json
index 82faa28476..7cc2dfd1ea 100644
--- a/docker/telemetry/grafana/dashboards/system-network-traffic.json
+++ b/docker/telemetry/grafana/dashboards/system-network-traffic.json
@@ -655,6 +655,119 @@
           }
         ]
       }
+    },
+    {
+      "title": "Duplicate Traffic (Wasted Bandwidth)",
+      "description": "Rate of duplicate overlay traffic across transaction, proposal, and validation categories. Duplicate messages are messages the node has already seen and discards. High duplicate rates indicate inefficient message routing or network topology issues causing redundant relays.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 32
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_transactions_duplicate_Bytes_In{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "TX Duplicate In"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_transactions_duplicate_Bytes_Out{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "TX Duplicate Out"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_proposals_duplicate_Bytes_In{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "Proposals Duplicate In"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_proposals_duplicate_Bytes_Out{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "Proposals Duplicate Out"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_validations_duplicate_Bytes_In{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "Validations Duplicate In"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_validations_duplicate_Bytes_Out{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "Validations Duplicate Out"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps",
+          "custom": {
+            "axisLabel": "Throughput",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "All Traffic Categories (Detail)",
+      "description": "Top 15 traffic categories by inbound byte rate, excluding the total aggregate. Provides a detailed timeseries view of which overlay message types are consuming the most bandwidth over time. Complements the bar gauge snapshot view in the Overlay Traffic panel.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 32
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(15, rate({__name__=~\"rippled_.*_Bytes_In\", __name__!~\"rippled_total_{exported_instance=~\"$node\"}.*\"}[5m]))",
+          "legendFormat": "{{__name__}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps",
+          "custom": {
+            "axisLabel": "Throughput",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
     }
   ],
   "schemaVersion": 39,
diff --git a/docker/telemetry/grafana/dashboards/system-node-health.json b/docker/telemetry/grafana/dashboards/system-node-health.json
index 456c62b2e1..96775edb7e 100644
--- a/docker/telemetry/grafana/dashboards/system-node-health.json
+++ b/docker/telemetry/grafana/dashboards/system-node-health.json
@@ -160,7 +160,7 @@
       ],
       "fieldConfig": {
         "defaults": {
-          "unit": "µs",
+          "unit": "\u00b5s",
           "custom": {
             "axisLabel": "Duration",
             "spanNulls": true,
@@ -287,7 +287,7 @@
     },
     {
       "title": "Job Queue Depth",
-      "description": "Current number of jobs waiting in the job queue. Sourced from the job_count gauge (JobQueue.cpp:26). A sustained high value indicates the node cannot process work fast enough — common during ledger replay or heavy RPC load.",
+      "description": "Current number of jobs waiting in the job queue. Sourced from the job_count gauge (JobQueue.cpp:26). A sustained high value indicates the node cannot process work fast enough \u2014 common during ledger replay or heavy RPC load.",
       "type": "timeseries",
       "gridPos": {
         "h": 8,
@@ -399,6 +399,497 @@
         },
         "overrides": []
       }
+    },
+    {
+      "title": "--- Extended Metrics (Recovered from Phase 6) ---",
+      "type": "row",
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 32
+      },
+      "collapsed": false,
+      "panels": []
+    },
+    {
+      "title": "Key Jobs Execution Time",
+      "description": "Execution time for critical job types at the selected quantile. Sourced from per-job-type events in JobTypeData (JobTypeData.h:48). Shows how long key consensus, transaction, and maintenance jobs take to execute. Spikes indicate processing bottlenecks.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 33
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_acceptLedger{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Accept Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_advanceLedger{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Advance Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_transaction{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Transaction [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_writeObjects{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Write Objects [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_heartbeat{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Heartbeat [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_sweep{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Sweep [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_trustedValidation{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Trusted Validation [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_trustedProposal{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Trusted Proposal [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_publishNewLedger{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Publish New Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_clientRPC{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Client RPC [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_ledgerData{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Ledger Data [{{quantile}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "custom": {
+            "axisLabel": "Duration (ms)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Key Jobs Dequeue Wait Time",
+      "description": "Time spent waiting in the job queue before execution for critical job types. Sourced from per-job-type dequeue events (JobTypeData.h:47). High dequeue times indicate the job queue is backlogged and jobs are waiting too long to be scheduled.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 33
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_acceptLedger_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Accept Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_advanceLedger_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Advance Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_transaction_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Transaction [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_writeObjects_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Write Objects [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_heartbeat_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Heartbeat [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_sweep_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Sweep [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_trustedValidation_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Trusted Validation [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_trustedProposal_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Trusted Proposal [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_publishNewLedger_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Publish New Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_clientRPC_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Client RPC [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_ledgerData_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Ledger Data [{{quantile}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "custom": {
+            "axisLabel": "Wait Time (ms)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "FullBelowCache Size",
+      "description": "Number of entries in the FullBelowCache. Sourced from the TaggedCache size gauge (TaggedCache.h:183) for the Node family full below cache (NodeFamily.cpp:29). This cache tracks which SHAMap nodes have all children present locally, avoiding redundant fetches during ledger acquisition.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 41
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_Node_family_full_below_cache_size{exported_instance=~\"$node\"}",
+          "legendFormat": "FullBelowCache Size"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short",
+          "custom": {
+            "axisLabel": "Entries",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "FullBelowCache Hit Rate",
+      "description": "Hit rate percentage for the FullBelowCache. Sourced from the TaggedCache hit_rate gauge (TaggedCache.h:184). A high hit rate means the node is efficiently reusing cached knowledge about complete SHAMap subtrees. Low hit rates during steady state warrant investigation.",
+      "type": "gauge",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 41
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_Node_family_full_below_cache_hit_rate{exported_instance=~\"$node\"}",
+          "legendFormat": "Hit Rate"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "thresholds": {
+            "steps": [
+              {
+                "color": "red",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 25
+              },
+              {
+                "color": "green",
+                "value": 50
+              }
+            ]
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Ledger Publish Gap",
+      "description": "Difference between published and validated ledger ages. Computed as Published_Ledger_Age minus Validated_Ledger_Age. A value near zero means the publish pipeline keeps up with validation. A growing gap indicates the publish pipeline is falling behind, potentially causing stale data for subscribers.",
+      "type": "stat",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 49
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_LedgerMaster_Published_Ledger_Age{exported_instance=~\"$node\"} - rippled_LedgerMaster_Validated_Ledger_Age{exported_instance=~\"$node\"}",
+          "legendFormat": "Publish Gap"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s",
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 5
+              },
+              {
+                "color": "red",
+                "value": 10
+              }
+            ]
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "State Duration Rate (Full vs Tracking)",
+      "description": "Rate of change of time spent in Full and Tracking operating modes, normalized to seconds. Sourced from State_Accounting duration gauges (NetworkOPs.cpp:774-778). In steady state the Full duration rate should be close to 1.0 (gaining one second of Full-mode time per wall-clock second). A drop below 1.0 means the node is spending time in other modes.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 49
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_State_Accounting_Full_duration{exported_instance=~\"$node\"}[5m]) / 1000000",
+          "legendFormat": "Full Mode Rate"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_State_Accounting_Tracking_duration{exported_instance=~\"$node\"}[5m]) / 1000000",
+          "legendFormat": "Tracking Mode Rate"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short",
+          "custom": {
+            "axisLabel": "Rate (s/s)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "All Jobs Execution Time (Detail)",
+      "description": "Execution time for ALL non-special job types at the selected quantile. Shows the complete picture of job execution performance. Use the Key Jobs panel for a focused view of the most critical jobs.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 57
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "{__name__=~\"rippled_(makeFetchPack|publishAcqLedger|untrustedValidation|manifest|localTransaction|ledgerReplayRequest|ledgerRequest|untrustedProposal|ledgerReplayTask|ledgerData|clientCommand|clientSubscribe|clientFeeChange|clientConsensus|clientAccountHistory|clientRPC|clientWebsocket|RPC|updatePaths|transaction|batch|advanceLedger|publishNewLedger|fetchTxnData|writeAhead|trustedValidation|writeObjects|acceptLedger|trustedProposal|sweep|clusterReport|heartbeat|administration|handleHaveTransactions|doTransactions)\", quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "{{__name__}} [{{quantile}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "custom": {
+            "axisLabel": "Duration (ms)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "All Jobs Dequeue Wait (Detail)",
+      "description": "Dequeue wait time for ALL non-special job types at the selected quantile. Shows the complete picture of job queue waiting times. High wait times across many job types indicate systemic job queue congestion.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 65
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "{__name__=~\"rippled_(makeFetchPack_q|publishAcqLedger_q|untrustedValidation_q|manifest_q|localTransaction_q|ledgerReplayRequest_q|ledgerRequest_q|untrustedProposal_q|ledgerReplayTask_q|ledgerData_q|clientCommand_q|clientSubscribe_q|clientFeeChange_q|clientConsensus_q|clientAccountHistory_q|clientRPC_q|clientWebsocket_q|RPC_q|updatePaths_q|transaction_q|batch_q|advanceLedger_q|publishNewLedger_q|fetchTxnData_q|writeAhead_q|trustedValidation_q|writeObjects_q|acceptLedger_q|trustedProposal_q|sweep_q|clusterReport_q|heartbeat_q|administration_q|handleHaveTransactions_q|doTransactions_q)\", quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "{{__name__}} [{{quantile}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "custom": {
+            "axisLabel": "Wait Time (ms)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
     }
   ],
   "schemaVersion": 39,

From 7d61a4a0ef95ca1cf6628d4d2b7fba34ee41b73a Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Thu, 14 May 2026 13:32:55 +0100
Subject: [PATCH 05/10] feat(telemetry): add missing Phase 9 metric panels to
 dashboards

13 metrics from 09-data-collection-reference.md were not displayed on
any Grafana dashboard. Adds panels for all of them:

system-node-health.json (+7 panels):
- NodeStore Bytes Read/Written (node_written_bytes, node_read_bytes)
- NodeStore Read Threads & Duration (node_reads_duration_us,
  read_request_bundle, read_threads_running, read_threads_total)
- AL_size added to Cache Sizes panel
- Current Ledger Index (ledger_current_index)
- NuDB Storage Size (storage_detail{metric="nudb_bytes"})

rippled-validator-health.json (+2 panels):
- UNL Blocked (validator_health{metric="unl_blocked"})
- Agreement/Missed Counters Rate (validation_agreements_total,
  validation_missed_total)

rippled-job-queue.json (+1 panel):
- Transaction Overflow Rate (jq_trans_overflow_total)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../grafana/dashboards/rippled-job-queue.json |  61 +++++
 .../dashboards/rippled-validator-health.json  | 145 ++++++++++-
 .../dashboards/system-node-health.json        | 239 ++++++++++++++++--
 3 files changed, 415 insertions(+), 30 deletions(-)

diff --git a/docker/telemetry/grafana/dashboards/rippled-job-queue.json b/docker/telemetry/grafana/dashboards/rippled-job-queue.json
index dfc5cf490f..6888b240a5 100644
--- a/docker/telemetry/grafana/dashboards/rippled-job-queue.json
+++ b/docker/telemetry/grafana/dashboards/rippled-job-queue.json
@@ -335,6 +335,67 @@
         },
         "overrides": []
       }
+    },
+    {
+      "title": "Transaction Overflow Rate",
+      "description": "Rate of job queue transaction overflows per minute. Overflows occur when the job queue's transaction limit is exceeded, causing transactions to be dropped. Non-zero values indicate the node is under heavy transaction load.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 32
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(xrpld_jq_trans_overflow_total{exported_instance=~\"$node\"}[5m]) * 60",
+          "legendFormat": "Overflows/min [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 1
+              },
+              {
+                "color": "red",
+                "value": 10
+              }
+            ]
+          },
+          "custom": {
+            "axisLabel": "Overflows / Min",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10,
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
     }
   ],
   "schemaVersion": 39,
diff --git a/docker/telemetry/grafana/dashboards/rippled-validator-health.json b/docker/telemetry/grafana/dashboards/rippled-validator-health.json
index d5c6df4d18..70a5741aaa 100644
--- a/docker/telemetry/grafana/dashboards/rippled-validator-health.json
+++ b/docker/telemetry/grafana/dashboards/rippled-validator-health.json
@@ -463,6 +463,139 @@
         "overrides": []
       }
     },
+    {
+      "title": "UNL Blocked",
+      "description": "Whether the node's UNL (Unique Node List) is blocked (1=blocked, 0=normal). A UNL-blocked node cannot determine validator trust and may stop participating in consensus.",
+      "type": "stat",
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 0,
+        "y": 18
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_validator_health{metric=\"unl_blocked\",exported_instance=~\"$node\"}",
+          "legendFormat": "UNL Blocked [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "mappings": [
+            {
+              "type": "value",
+              "options": {
+                "0": {
+                  "text": "OK",
+                  "color": "green"
+                }
+              }
+            },
+            {
+              "type": "value",
+              "options": {
+                "1": {
+                  "text": "BLOCKED",
+                  "color": "red"
+                }
+              }
+            }
+          ],
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 1
+              }
+            ]
+          },
+          "custom": {}
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Agreement/Missed Counters (Rate)",
+      "description": "Rate of cumulative validation agreements and misses per minute. These monotonic counters complement the rolling window percentages above.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 18,
+        "x": 6,
+        "y": 18
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(xrpld_validation_agreements_total{exported_instance=~\"$node\"}[5m]) * 60",
+          "legendFormat": "Agreements/min [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(xrpld_validation_missed_total{exported_instance=~\"$node\"}[5m]) * 60",
+          "legendFormat": "Missed/min [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Per Minute",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10,
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byRegexp",
+              "options": "Missed.*"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "mode": "fixed",
+                  "fixedColor": "red"
+                }
+              }
+            ]
+          }
+        ]
+      }
+    },
     {
       "title": "--- Server State & Consensus ---",
       "type": "row",
@@ -470,7 +603,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 18
+        "y": 26
       },
       "collapsed": false,
       "panels": []
@@ -483,7 +616,7 @@
         "h": 8,
         "w": 6,
         "x": 0,
-        "y": 19
+        "y": 27
       },
       "options": {
         "tooltip": {
@@ -516,7 +649,7 @@
         "h": 8,
         "w": 18,
         "x": 6,
-        "y": 19
+        "y": 27
       },
       "options": {
         "tooltip": {
@@ -561,7 +694,7 @@
         "h": 8,
         "w": 8,
         "x": 0,
-        "y": 27
+        "y": 35
       },
       "options": {
         "tooltip": {
@@ -594,7 +727,7 @@
         "h": 8,
         "w": 8,
         "x": 8,
-        "y": 27
+        "y": 35
       },
       "options": {
         "tooltip": {
@@ -643,7 +776,7 @@
         "h": 8,
         "w": 8,
         "x": 16,
-        "y": 27
+        "y": 35
       },
       "options": {
         "tooltip": {
diff --git a/docker/telemetry/grafana/dashboards/system-node-health.json b/docker/telemetry/grafana/dashboards/system-node-health.json
index 7827ef8bb9..14a655cd2f 100644
--- a/docker/telemetry/grafana/dashboards/system-node-health.json
+++ b/docker/telemetry/grafana/dashboards/system-node-health.json
@@ -535,6 +535,116 @@
         "overrides": []
       }
     },
+    {
+      "title": "NodeStore Bytes Read/Written",
+      "description": "Cumulative bytes read and written by the NodeStore backend. Sourced from MetricsRegistry nodestore_state observable gauge with metric=node_written_bytes, node_read_bytes.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 41
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_read_bytes\"}",
+          "legendFormat": "Bytes Read [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_written_bytes\"}",
+          "legendFormat": "Bytes Written [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "decbytes",
+          "custom": {
+            "axisLabel": "Bytes",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "NodeStore Read Threads & Duration",
+      "description": "Read thread utilization and cumulative read duration. read_threads_running/read_threads_total shows thread saturation. node_reads_duration_us tracks cumulative time spent in read I/O. read_request_bundle tracks bundled read operations.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 41
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_threads_running\"}",
+          "legendFormat": "Read Threads Running [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_threads_total\"}",
+          "legendFormat": "Read Threads Total [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_request_bundle\"}",
+          "legendFormat": "Read Request Bundle [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_duration_us\"}",
+          "legendFormat": "Read Duration (\u00b5s) [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Count / \u00b5s",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
     {
       "title": "--- OTel: Cache Hit Rates ---",
       "type": "row",
@@ -542,7 +652,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 41
+        "y": 49
       },
       "collapsed": false,
       "panels": []
@@ -555,7 +665,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 42
+        "y": 50
       },
       "options": {
         "tooltip": {
@@ -610,13 +720,13 @@
     },
     {
       "title": "Cache Sizes",
-      "description": "TreeNode cache size, TreeNode track size, and FullBelow cache size. Sourced from MetricsRegistry cache_metrics observable gauge.",
+      "description": "TreeNode cache size, TreeNode track size, and FullBelow cache size. Sourced from MetricsRegistry cache_metrics observable gauge. Also includes AcceptedLedger (AL) cache size.",
       "type": "timeseries",
       "gridPos": {
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 42
+        "y": 50
       },
       "options": {
         "tooltip": {
@@ -645,6 +755,13 @@
           },
           "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"fullbelow_size\"}",
           "legendFormat": "FullBelow [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"AL_size\"}",
+          "legendFormat": "AcceptedLedger Size [{{exported_instance}}]"
         }
       ],
       "fieldConfig": {
@@ -670,7 +787,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 50
+        "y": 58
       },
       "collapsed": false,
       "panels": []
@@ -683,7 +800,7 @@
         "h": 8,
         "w": 24,
         "x": 0,
-        "y": 51
+        "y": 59
       },
       "options": {
         "tooltip": {
@@ -728,7 +845,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 59
+        "y": 67
       },
       "collapsed": false,
       "panels": []
@@ -741,7 +858,7 @@
         "h": 8,
         "w": 6,
         "x": 0,
-        "y": 60
+        "y": 68
       },
       "options": {
         "tooltip": {
@@ -821,7 +938,7 @@
         "h": 8,
         "w": 6,
         "x": 6,
-        "y": 60
+        "y": 68
       },
       "options": {
         "tooltip": {
@@ -854,7 +971,7 @@
         "h": 8,
         "w": 6,
         "x": 12,
-        "y": 60
+        "y": 68
       },
       "options": {
         "tooltip": {
@@ -887,7 +1004,7 @@
         "h": 8,
         "w": 6,
         "x": 18,
-        "y": 60
+        "y": 68
       },
       "options": {
         "tooltip": {
@@ -920,7 +1037,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 68
+        "y": 76
       },
       "options": {
         "tooltip": {
@@ -967,7 +1084,7 @@
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 68
+        "y": 76
       },
       "options": {
         "tooltip": {
@@ -993,6 +1110,80 @@
         "overrides": []
       }
     },
+    {
+      "title": "Current Ledger Index",
+      "description": "Current open ledger sequence number. The gap between this and validated_ledger_seq represents ledgers in flight.",
+      "type": "stat",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 84
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"ledger_current_index\"}",
+          "legendFormat": "Current Ledger [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {}
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "NuDB Storage Size",
+      "description": "NuDB backend file size in bytes. Sourced from MetricsRegistry storage_detail observable gauge. Tracks database growth over time.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 84
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_storage_detail{exported_instance=~\"$node\", metric=\"nudb_bytes\"}",
+          "legendFormat": "NuDB Size [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "decbytes",
+          "custom": {
+            "axisLabel": "Size",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
     {
       "title": "--- OTel: Complete Ledgers & DB ---",
       "type": "row",
@@ -1000,7 +1191,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 76
+        "y": 92
       },
       "collapsed": false,
       "panels": []
@@ -1013,7 +1204,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 77
+        "y": 93
       },
       "options": {
         "showHeader": true
@@ -1045,7 +1236,7 @@
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 77
+        "y": 93
       },
       "options": {
         "tooltip": {
@@ -1100,7 +1291,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 85
+        "y": 101
       },
       "options": {
         "tooltip": {
@@ -1133,7 +1324,7 @@
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 85
+        "y": 101
       },
       "options": {
         "tooltip": {
@@ -1172,7 +1363,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 93
+        "y": 109
       },
       "collapsed": false,
       "panels": []
@@ -1185,7 +1376,7 @@
         "h": 8,
         "w": 6,
         "x": 0,
-        "y": 94
+        "y": 110
       },
       "options": {
         "tooltip": {
@@ -1218,7 +1409,7 @@
         "h": 8,
         "w": 6,
         "x": 6,
-        "y": 94
+        "y": 110
       },
       "options": {
         "tooltip": {
@@ -1251,7 +1442,7 @@
         "h": 8,
         "w": 6,
         "x": 12,
-        "y": 94
+        "y": 110
       },
       "options": {
         "tooltip": {
@@ -1284,7 +1475,7 @@
         "h": 8,
         "w": 6,
         "x": 18,
-        "y": 94
+        "y": 110
       },
       "options": {
         "tooltip": {
@@ -1333,7 +1524,7 @@
         "h": 8,
         "w": 24,
         "x": 0,
-        "y": 102
+        "y": 118
       },
       "options": {
         "tooltip": {

From 6f403fdd1b9fb936bf0703fe75ce7149effb5a96 Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Thu, 14 May 2026 13:58:13 +0100
Subject: [PATCH 06/10] fix(telemetry): align Tempo search tags with C++ span
 attribute names

RPC span attributes use bare names (command, rpc_status, rpc_role) per
the naming convention in SpanNames.h, not xrpl.rpc.* qualified names.
Node health attributes (amendment_blocked, server_state) are resource
attributes set at Tracer init, not span attributes.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../grafana/provisioning/datasources/tempo.yaml      | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
index 198c2550d3..7f4bd3684f 100644
--- a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
+++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
@@ -92,28 +92,28 @@ datasources:
             type: static
           # Phase 2: RPC tracing filters
           - id: rpc-command
-            tag: xrpl.rpc.command
+            tag: command
             operator: "="
             scope: span
             type: static
           - id: rpc-status
-            tag: xrpl.rpc.status
+            tag: rpc_status
             operator: "="
             scope: span
             type: dynamic
           - id: rpc-role
-            tag: xrpl.rpc.role
+            tag: rpc_role
             operator: "="
             scope: span
             type: dynamic
-          # Phase 2: Node health filters (Task 2.8)
+          # Phase 2: Node health filters (Task 2.8) — resource attributes
           - id: node-amendment-blocked
             tag: xrpl.node.amendment_blocked
             operator: "="
-            scope: span
+            scope: resource
             type: static
           - id: node-server-state
             tag: xrpl.node.server_state
             operator: "="
-            scope: span
+            scope: resource
             type: dynamic

From b3920355444269410a800a513b13b7fab141f6e7 Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Thu, 14 May 2026 13:58:31 +0100
Subject: [PATCH 07/10] fix(telemetry): align Tempo TX search tags with C++
 attribute names

Transaction span attributes use bare names (local, tx_status) per
SpanNames.h convention, not xrpl.tx.* qualified names. xrpl.tx.hash
is correct (shared canonical attr defined in SpanNames.h).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docker/telemetry/grafana/provisioning/datasources/tempo.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
index 7f1265bf28..542d7ddb7d 100644
--- a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
+++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
@@ -125,12 +125,12 @@ datasources:
             scope: span
             type: static
           - id: tx-origin
-            tag: xrpl.tx.local
+            tag: local
             operator: "="
             scope: span
             type: dynamic
           - id: tx-status
-            tag: xrpl.tx.status
+            tag: tx_status
             operator: "="
             scope: span
             type: dynamic

From 61ab5c6fe3c9cb64e9dce1b6ede8805160185a19 Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Thu, 14 May 2026 13:59:08 +0100
Subject: [PATCH 08/10] fix(telemetry): align Tempo consensus search tags with
 C++ attribute names

Consensus span attributes use bare names (close_time_correct,
consensus_state, close_resolution_ms) and shared canonical attrs
(xrpl.ledger.seq) per SpanNames.h. xrpl.consensus.mode and
xrpl.consensus.round are correct (domain-qualified to avoid collision).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../telemetry/grafana/provisioning/datasources/tempo.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
index 0e45ae1c87..03b232c000 100644
--- a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
+++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
@@ -147,22 +147,22 @@ datasources:
             scope: span
             type: dynamic
           - id: consensus-ledger-seq
-            tag: xrpl.consensus.ledger.seq
+            tag: xrpl.ledger.seq
             operator: "="
             scope: span
             type: static
           - id: consensus-close-time-correct
-            tag: xrpl.consensus.close_time_correct
+            tag: close_time_correct
             operator: "="
             scope: span
             type: dynamic
           - id: consensus-state
-            tag: xrpl.consensus.state
+            tag: consensus_state
             operator: "="
             scope: span
             type: dynamic
           - id: consensus-close-resolution
-            tag: xrpl.consensus.close_resolution_ms
+            tag: close_resolution_ms
             operator: "="
             scope: span
             type: dynamic

From b449db0434f6b212be9c133c804d345981e7c26a Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Thu, 14 May 2026 14:01:12 +0100
Subject: [PATCH 09/10] fix(telemetry): align spanmetrics dimensions, Tempo
 tags, and dashboard queries with C++ attribute names

Spanmetrics dimensions used xrpl.rpc.command etc. but C++ emits bare
"command". Tempo tags for phase6-added consensus/tx/peer filters used
qualified names but C++ uses bare names. Dashboard panel referenced
xrpl_tx_suppressed (never populated) instead of suppressed.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../dashboards/transaction-overview.json       |  4 ++--
 .../provisioning/datasources/tempo.yaml        | 18 +++++++++---------
 docker/telemetry/otel-collector-config.yaml    | 14 +++++++-------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/docker/telemetry/grafana/dashboards/transaction-overview.json b/docker/telemetry/grafana/dashboards/transaction-overview.json
index 1d6a4c0dd0..edcb4d872e 100644
--- a/docker/telemetry/grafana/dashboards/transaction-overview.json
+++ b/docker/telemetry/grafana/dashboards/transaction-overview.json
@@ -147,8 +147,8 @@
           "datasource": {
             "type": "prometheus"
           },
-          "expr": "sum by (xrpl_tx_suppressed, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"tx.receive\", exported_instance=~\"$node\"}[$__rate_interval]))",
-          "legendFormat": "Suppressed={{xrpl_tx_suppressed}} [{{exported_instance}}]"
+          "expr": "sum by (suppressed, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"tx.receive\", exported_instance=~\"$node\"}[$__rate_interval]))",
+          "legendFormat": "Suppressed={{suppressed}} [{{exported_instance}}]"
         }
       ],
       "fieldConfig": {
diff --git a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
index 343d863928..dac00303a7 100644
--- a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
+++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
@@ -167,22 +167,22 @@ datasources:
             scope: span
             type: dynamic
           - id: consensus-proposers
-            tag: xrpl.consensus.proposers
+            tag: proposers
             operator: "="
             scope: span
             type: dynamic
           - id: consensus-result
-            tag: xrpl.consensus.result
+            tag: consensus_result
             operator: "="
             scope: span
             type: dynamic
           - id: consensus-mode-old
-            tag: xrpl.consensus.mode.old
+            tag: mode_old
             operator: "="
             scope: span
             type: dynamic
           - id: consensus-mode-new
-            tag: xrpl.consensus.mode.new
+            tag: mode_new
             operator: "="
             scope: span
             type: dynamic
@@ -193,27 +193,27 @@ datasources:
             type: static
           # Phase 3/4: Additional transaction and queue filters
           - id: tx-path
-            tag: xrpl.tx.path
+            tag: path
             operator: "="
             scope: span
             type: dynamic
           - id: tx-suppressed
-            tag: xrpl.tx.suppressed
+            tag: suppressed
             operator: "="
             scope: span
             type: dynamic
           - id: peer-version
-            tag: xrpl.peer.version
+            tag: peer_version
             operator: "="
             scope: span
             type: dynamic
           - id: txq-status
-            tag: xrpl.txq.status
+            tag: txq_status
             operator: "="
             scope: span
             type: dynamic
           - id: txq-ter-code
-            tag: xrpl.txq.ter_code
+            tag: ter_code
             operator: "="
             scope: span
             type: dynamic
diff --git a/docker/telemetry/otel-collector-config.yaml b/docker/telemetry/otel-collector-config.yaml
index bfe782ffd5..1c7b184772 100644
--- a/docker/telemetry/otel-collector-config.yaml
+++ b/docker/telemetry/otel-collector-config.yaml
@@ -49,14 +49,14 @@ connectors:
       explicit:
         buckets: [1ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 5s]
     dimensions:
-      - name: xrpl.rpc.command
-      - name: xrpl.rpc.status
+      - name: command
+      - name: rpc_status
       - name: xrpl.consensus.mode
-      - name: xrpl.consensus.close_time_correct
-      - name: xrpl.tx.local
-      - name: xrpl.tx.suppressed
-      - name: xrpl.peer.proposal.trusted
-      - name: xrpl.peer.validation.trusted
+      - name: close_time_correct
+      - name: local
+      - name: suppressed
+      - name: proposal_trusted
+      - name: validation_trusted
 
 exporters:
   debug:

From 7ada57e2a84651d62b23ac86d3b13a12b3ad2e03 Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Thu, 14 May 2026 15:53:59 +0100
Subject: [PATCH 10/10] fix(telemetry): map TraceCategory to OTel SpanKind in
 SpanGuard::span()

SpanGuard::span() hardcoded SpanKind::kInternal for every span. Tempo's
service-graph and spanmetrics RED calculations rely on kServer /
kConsumer / kClient / kProducer to classify inbound vs outbound vs
internal operations. With kInternal everywhere, the service graph
collapses to a single self-loop and RED metrics attribute all latency
to internal work.

Add categoryToSpanKind() mapping:
  - Rpc           -> kServer   (inbound synchronous request)
  - Peer          -> kConsumer (inbound async peer message)
  - Transactions  -> kInternal
  - Consensus     -> kInternal
  - Ledger        -> kInternal

Only the single-argument overload is affected; childSpan / linkedSpan
continue to default to kInternal because they represent in-process
continuations of an already-kinded parent.
---
 src/libxrpl/telemetry/SpanGuard.cpp | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/libxrpl/telemetry/SpanGuard.cpp b/src/libxrpl/telemetry/SpanGuard.cpp
index 22c210c7c5..11eda56ed6 100644
--- a/src/libxrpl/telemetry/SpanGuard.cpp
+++ b/src/libxrpl/telemetry/SpanGuard.cpp
@@ -132,6 +132,32 @@ isCategoryEnabled(Telemetry const& tel, TraceCategory cat)
     return false;  // unreachable, silences compiler warning
 }
 
+namespace {
+
+// Map a TraceCategory to an OTel SpanKind so Tempo's service-graph /
+// RED metrics see the correct direction. RPC spans are emitted at the
+// server entry point (handler dispatch), Peer spans at inbound-message
+// receipt. Transactions / Consensus / Ledger are internal processing
+// and keep the default kInternal.
+otel_trace::SpanKind
+categoryToSpanKind(TraceCategory cat)
+{
+    switch (cat)
+    {
+        case TraceCategory::Rpc:
+            return otel_trace::SpanKind::kServer;
+        case TraceCategory::Peer:
+            return otel_trace::SpanKind::kConsumer;
+        case TraceCategory::Transactions:
+        case TraceCategory::Consensus:
+        case TraceCategory::Ledger:
+            return otel_trace::SpanKind::kInternal;
+    }
+    return otel_trace::SpanKind::kInternal;  // unreachable
+}
+
+}  // namespace
+
 SpanGuard
 SpanGuard::span(TraceCategory cat, std::string_view prefix, std::string_view name)
 {
@@ -139,7 +165,7 @@ SpanGuard::span(TraceCategory cat, std::string_view prefix, std::string_view nam
     if (!tel || !tel->isEnabled() || !isCategoryEnabled(*tel, cat))
         return {};
     auto fullName = std::string(prefix) + "." + std::string(name);
-    return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName)));
+    return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, categoryToSpanKind(cat))));
 }
 
 // ===== Child / linked span creation ========================================