merge: pratik/otel-phase9-metric-gap-fill fix(SpanKind) into pratik/otel-phase10-workload-validation

# Conflicts: # docker/telemetry/otel-collector-config.yaml # docker/telemetry/xrpld-telemetry.cfg
2026-07-23 23:20:33 +00:00 · 2026-05-14 15:59:39 +01:00
parent a9e4006591 9d99ce6ae8
commit 34bf61ff77
20 changed files with 1241 additions and 94 deletions
--- a/docker/telemetry/grafana/dashboards/consensus-health.json
+++ b/docker/telemetry/grafana/dashboards/consensus-health.json
@@ -772,5 +772,6 @@
    "to": "now"
  },
  "title": "Consensus Health",
-  "uid": "rippled-consensus"
+  "uid": "rippled-consensus",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/ledger-operations.json
+++ b/docker/telemetry/grafana/dashboards/ledger-operations.json
@@ -349,5 +349,6 @@
    "to": "now"
  },
  "title": "Ledger Operations",
-  "uid": "rippled-ledger-ops"
+  "uid": "rippled-ledger-ops",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/peer-network.json
+++ b/docker/telemetry/grafana/dashboards/peer-network.json
@@ -223,5 +223,6 @@
    "to": "now"
  },
  "title": "Peer Network",
-  "uid": "rippled-peer-net"
+  "uid": "rippled-peer-net",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/rippled-fee-market.json
+++ b/docker/telemetry/grafana/dashboards/rippled-fee-market.json
@@ -339,5 +339,6 @@
  "timezone": "browser",
  "title": "Fee Market & TxQ",
  "uid": "rippled-fee-market",
-  "version": 1
+  "version": 1,
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/rippled-job-queue.json
+++ b/docker/telemetry/grafana/dashboards/rippled-job-queue.json
@@ -214,7 +214,7 @@
            "drawStyle": "line",
            "lineWidth": 2,
            "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
@@ -273,7 +273,7 @@
            "drawStyle": "line",
            "lineWidth": 2,
            "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
@@ -323,7 +323,68 @@
            "drawStyle": "line",
            "lineWidth": 1,
            "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Transaction Overflow Rate",
+      "description": "Rate of job queue transaction overflows per minute. Overflows occur when the job queue's transaction limit is exceeded, causing transactions to be dropped. Non-zero values indicate the node is under heavy transaction load.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 32
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(xrpld_jq_trans_overflow_total{exported_instance=~\"$node\"}[5m]) * 60",
+          "legendFormat": "Overflows/min [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 1
+              },
+              {
+                "color": "red",
+                "value": 10
+              }
+            ]
+          },
+          "custom": {
+            "axisLabel": "Overflows / Min",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10,
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
@@ -391,5 +452,6 @@
  "timezone": "browser",
  "title": "Job Queue Analysis",
  "uid": "rippled-job-queue",
-  "version": 1
+  "version": 1,
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/rippled-peer-quality.json
+++ b/docker/telemetry/grafana/dashboards/rippled-peer-quality.json
@@ -202,13 +202,19 @@
            {
              "type": "value",
              "options": {
-                "0": { "text": "No", "color": "green" }
+                "0": {
+                  "text": "No",
+                  "color": "green"
+                }
              }
            },
            {
              "type": "value",
              "options": {
-                "1": { "text": "Yes", "color": "red" }
+                "1": {
+                  "text": "Yes",
+                  "color": "red"
+                }
              }
            }
          ],
@@ -387,5 +393,6 @@
    "to": "now"
  },
  "title": "Peer Quality",
-  "uid": "rippled-peer-quality"
+  "uid": "rippled-peer-quality",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json
+++ b/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json
@@ -214,7 +214,7 @@
            "drawStyle": "line",
            "lineWidth": 2,
            "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
@@ -264,7 +264,7 @@
            "drawStyle": "line",
            "lineWidth": 1,
            "fillOpacity": 5,
-            "axisLabel": "Duration (μs)",
+            "axisLabel": "Duration (\u03bcs)",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
@@ -400,5 +400,6 @@
  "timezone": "browser",
  "title": "RPC Performance (OTel)",
  "uid": "rippled-rpc-perf",
-  "version": 1
+  "version": 1,
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/rippled-validator-health.json
+++ b/docker/telemetry/grafana/dashboards/rippled-validator-health.json
@@ -381,13 +381,19 @@
            {
              "type": "value",
              "options": {
-                "0": { "text": "OK", "color": "green" }
+                "0": {
+                  "text": "OK",
+                  "color": "green"
+                }
              }
            },
            {
              "type": "value",
              "options": {
-                "1": { "text": "BLOCKED", "color": "red" }
+                "1": {
+                  "text": "BLOCKED",
+                  "color": "red"
+                }
              }
            }
          ],
@@ -457,6 +463,139 @@
        "overrides": []
      }
    },
+    {
+      "title": "UNL Blocked",
+      "description": "Whether the node's UNL (Unique Node List) is blocked (1=blocked, 0=normal). A UNL-blocked node cannot determine validator trust and may stop participating in consensus.",
+      "type": "stat",
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 0,
+        "y": 18
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_validator_health{metric=\"unl_blocked\",exported_instance=~\"$node\"}",
+          "legendFormat": "UNL Blocked [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "mappings": [
+            {
+              "type": "value",
+              "options": {
+                "0": {
+                  "text": "OK",
+                  "color": "green"
+                }
+              }
+            },
+            {
+              "type": "value",
+              "options": {
+                "1": {
+                  "text": "BLOCKED",
+                  "color": "red"
+                }
+              }
+            }
+          ],
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 1
+              }
+            ]
+          },
+          "custom": {}
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Agreement/Missed Counters (Rate)",
+      "description": "Rate of cumulative validation agreements and misses per minute. These monotonic counters complement the rolling window percentages above.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 18,
+        "x": 6,
+        "y": 18
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(xrpld_validation_agreements_total{exported_instance=~\"$node\"}[5m]) * 60",
+          "legendFormat": "Agreements/min [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(xrpld_validation_missed_total{exported_instance=~\"$node\"}[5m]) * 60",
+          "legendFormat": "Missed/min [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Per Minute",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10,
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byRegexp",
+              "options": "Missed.*"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "mode": "fixed",
+                  "fixedColor": "red"
+                }
+              }
+            ]
+          }
+        ]
+      }
+    },
    {
      "title": "--- Server State & Consensus ---",
      "type": "row",
@@ -464,7 +603,7 @@
        "h": 1,
        "w": 24,
        "x": 0,
-        "y": 18
+        "y": 26
      },
      "collapsed": false,
      "panels": []
@@ -477,7 +616,7 @@
        "h": 8,
        "w": 6,
        "x": 0,
-        "y": 19
+        "y": 27
      },
      "options": {
        "tooltip": {
@@ -510,7 +649,7 @@
        "h": 8,
        "w": 18,
        "x": 6,
-        "y": 19
+        "y": 27
      },
      "options": {
        "tooltip": {
@@ -555,7 +694,7 @@
        "h": 8,
        "w": 8,
        "x": 0,
-        "y": 27
+        "y": 35
      },
      "options": {
        "tooltip": {
@@ -588,7 +727,7 @@
        "h": 8,
        "w": 8,
        "x": 8,
-        "y": 27
+        "y": 35
      },
      "options": {
        "tooltip": {
@@ -637,7 +776,7 @@
        "h": 8,
        "w": 8,
        "x": 16,
-        "y": 27
+        "y": 35
      },
      "options": {
        "tooltip": {
@@ -710,5 +849,6 @@
    "to": "now"
  },
  "title": "Validator Health",
-  "uid": "rippled-validator-health"
+  "uid": "rippled-validator-health",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/rpc-performance.json
+++ b/docker/telemetry/grafana/dashboards/rpc-performance.json
@@ -328,7 +328,7 @@
      {
        "name": "node",
        "label": "Node",
-        "description": "Filter by rippled node (service.instance.id — e.g. Node-1)",
+        "description": "Filter by rippled node (service.instance.id \u2014 e.g. Node-1)",
        "type": "query",
        "query": "label_values(traces_span_metrics_calls_total, exported_instance)",
        "datasource": {
@@ -372,5 +372,6 @@
    "to": "now"
  },
  "title": "RPC Performance",
-  "uid": "rippled-rpc-perf"
+  "uid": "rippled-rpc-perf",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/system-ledger-data-sync.json
+++ b/docker/telemetry/grafana/dashboards/system-ledger-data-sync.json
@@ -523,5 +523,6 @@
    "to": "now"
  },
  "title": "Ledger Data & Sync (System Metrics)",
-  "uid": "rippled-system-ledger-sync"
+  "uid": "rippled-system-ledger-sync",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/system-network-traffic.json
+++ b/docker/telemetry/grafana/dashboards/system-network-traffic.json
@@ -655,6 +655,119 @@
          }
        ]
      }
+    },
+    {
+      "title": "Duplicate Traffic (Wasted Bandwidth)",
+      "description": "Rate of duplicate overlay traffic across transaction, proposal, and validation categories. Duplicate messages are messages the node has already seen and discards. High duplicate rates indicate inefficient message routing or network topology issues causing redundant relays.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 32
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_transactions_duplicate_Bytes_In{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "TX Duplicate In"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_transactions_duplicate_Bytes_Out{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "TX Duplicate Out"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_proposals_duplicate_Bytes_In{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "Proposals Duplicate In"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_proposals_duplicate_Bytes_Out{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "Proposals Duplicate Out"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_validations_duplicate_Bytes_In{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "Validations Duplicate In"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_validations_duplicate_Bytes_Out{exported_instance=~\"$node\"}[5m])",
+          "legendFormat": "Validations Duplicate Out"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps",
+          "custom": {
+            "axisLabel": "Throughput",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "All Traffic Categories (Detail)",
+      "description": "Top 15 traffic categories by inbound byte rate, excluding the total aggregate. Provides a detailed timeseries view of which overlay message types are consuming the most bandwidth over time. Complements the bar gauge snapshot view in the Overlay Traffic panel.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 32
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(15, rate({__name__=~\"rippled_.*_Bytes_In\", __name__!~\"rippled_total_{exported_instance=~\"$node\"}.*\"}[5m]))",
+          "legendFormat": "{{__name__}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps",
+          "custom": {
+            "axisLabel": "Throughput",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
    }
  ],
  "schemaVersion": 39,
@@ -688,5 +801,6 @@
    "to": "now"
  },
  "title": "Network Traffic (System Metrics)",
-  "uid": "rippled-system-network"
+  "uid": "rippled-system-network",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/system-node-health.json
+++ b/docker/telemetry/grafana/dashboards/system-node-health.json
@@ -162,7 +162,7 @@
      ],
      "fieldConfig": {
        "defaults": {
-          "unit": "µs",
+          "unit": "\u00b5s",
          "custom": {
            "axisLabel": "Duration",
            "spanNulls": true,
@@ -289,7 +289,7 @@
    },
    {
      "title": "Job Queue Depth",
-      "description": "Current number of jobs waiting in the job queue. Sourced from the job_count gauge (JobQueue.cpp:26). A sustained high value indicates the node cannot process work fast enough — common during ledger replay or heavy RPC load.",
+      "description": "Current number of jobs waiting in the job queue. Sourced from the job_count gauge (JobQueue.cpp:26). A sustained high value indicates the node cannot process work fast enough \u2014 common during ledger replay or heavy RPC load.",
      "type": "timeseries",
      "gridPos": {
        "h": 8,
@@ -535,6 +535,116 @@
        "overrides": []
      }
    },
+    {
+      "title": "NodeStore Bytes Read/Written",
+      "description": "Cumulative bytes read and written by the NodeStore backend. Sourced from MetricsRegistry nodestore_state observable gauge with metric=node_written_bytes, node_read_bytes.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 41
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_read_bytes\"}",
+          "legendFormat": "Bytes Read [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_written_bytes\"}",
+          "legendFormat": "Bytes Written [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "decbytes",
+          "custom": {
+            "axisLabel": "Bytes",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "NodeStore Read Threads & Duration",
+      "description": "Read thread utilization and cumulative read duration. read_threads_running/read_threads_total shows thread saturation. node_reads_duration_us tracks cumulative time spent in read I/O. read_request_bundle tracks bundled read operations.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 41
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_threads_running\"}",
+          "legendFormat": "Read Threads Running [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_threads_total\"}",
+          "legendFormat": "Read Threads Total [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_request_bundle\"}",
+          "legendFormat": "Read Request Bundle [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_duration_us\"}",
+          "legendFormat": "Read Duration (\u00b5s) [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Count / \u00b5s",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
    {
      "title": "--- OTel: Cache Hit Rates ---",
      "type": "row",
@@ -542,7 +652,7 @@
        "h": 1,
        "w": 24,
        "x": 0,
-        "y": 41
+        "y": 49
      },
      "collapsed": false,
      "panels": []
@@ -555,7 +665,7 @@
        "h": 8,
        "w": 12,
        "x": 0,
-        "y": 42
+        "y": 50
      },
      "options": {
        "tooltip": {
@@ -610,13 +720,13 @@
    },
    {
      "title": "Cache Sizes",
-      "description": "TreeNode cache size, TreeNode track size, and FullBelow cache size. Sourced from MetricsRegistry cache_metrics observable gauge.",
+      "description": "TreeNode cache size, TreeNode track size, and FullBelow cache size. Sourced from MetricsRegistry cache_metrics observable gauge. Also includes AcceptedLedger (AL) cache size.",
      "type": "timeseries",
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
-        "y": 42
+        "y": 50
      },
      "options": {
        "tooltip": {
@@ -645,6 +755,13 @@
          },
          "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"fullbelow_size\"}",
          "legendFormat": "FullBelow [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"AL_size\"}",
+          "legendFormat": "AcceptedLedger Size [{{exported_instance}}]"
        }
      ],
      "fieldConfig": {
@@ -670,7 +787,7 @@
        "h": 1,
        "w": 24,
        "x": 0,
-        "y": 50
+        "y": 58
      },
      "collapsed": false,
      "panels": []
@@ -683,7 +800,7 @@
        "h": 8,
        "w": 24,
        "x": 0,
-        "y": 51
+        "y": 59
      },
      "options": {
        "tooltip": {
@@ -728,7 +845,7 @@
        "h": 1,
        "w": 24,
        "x": 0,
-        "y": 59
+        "y": 67
      },
      "collapsed": false,
      "panels": []
@@ -741,7 +858,7 @@
        "h": 8,
        "w": 6,
        "x": 0,
-        "y": 60
+        "y": 68
      },
      "options": {
        "tooltip": {
@@ -764,23 +881,48 @@
          "mappings": [
            {
              "type": "value",
-              "options": { "0": { "text": "DISCONNECTED", "color": "red" } }
+              "options": {
+                "0": {
+                  "text": "DISCONNECTED",
+                  "color": "red"
+                }
+              }
            },
            {
              "type": "value",
-              "options": { "1": { "text": "CONNECTED", "color": "orange" } }
+              "options": {
+                "1": {
+                  "text": "CONNECTED",
+                  "color": "orange"
+                }
+              }
            },
            {
              "type": "value",
-              "options": { "2": { "text": "SYNCING", "color": "yellow" } }
+              "options": {
+                "2": {
+                  "text": "SYNCING",
+                  "color": "yellow"
+                }
+              }
            },
            {
              "type": "value",
-              "options": { "3": { "text": "TRACKING", "color": "blue" } }
+              "options": {
+                "3": {
+                  "text": "TRACKING",
+                  "color": "blue"
+                }
+              }
            },
            {
              "type": "value",
-              "options": { "4": { "text": "FULL", "color": "green" } }
+              "options": {
+                "4": {
+                  "text": "FULL",
+                  "color": "green"
+                }
+              }
            }
          ],
          "custom": {}
@@ -796,7 +938,7 @@
        "h": 8,
        "w": 6,
        "x": 6,
-        "y": 60
+        "y": 68
      },
      "options": {
        "tooltip": {
@@ -829,7 +971,7 @@
        "h": 8,
        "w": 6,
        "x": 12,
-        "y": 60
+        "y": 68
      },
      "options": {
        "tooltip": {
@@ -862,7 +1004,7 @@
        "h": 8,
        "w": 6,
        "x": 18,
-        "y": 60
+        "y": 68
      },
      "options": {
        "tooltip": {
@@ -895,7 +1037,7 @@
        "h": 8,
        "w": 12,
        "x": 0,
-        "y": 68
+        "y": 76
      },
      "options": {
        "tooltip": {
@@ -942,7 +1084,7 @@
        "h": 8,
        "w": 12,
        "x": 12,
-        "y": 68
+        "y": 76
      },
      "options": {
        "tooltip": {
@@ -968,6 +1110,80 @@
        "overrides": []
      }
    },
+    {
+      "title": "Current Ledger Index",
+      "description": "Current open ledger sequence number. The gap between this and validated_ledger_seq represents ledgers in flight.",
+      "type": "stat",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 84
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"ledger_current_index\"}",
+          "legendFormat": "Current Ledger [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {}
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "NuDB Storage Size",
+      "description": "NuDB backend file size in bytes. Sourced from MetricsRegistry storage_detail observable gauge. Tracks database growth over time.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 84
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "xrpld_storage_detail{exported_instance=~\"$node\", metric=\"nudb_bytes\"}",
+          "legendFormat": "NuDB Size [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "decbytes",
+          "custom": {
+            "axisLabel": "Size",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
    {
      "title": "--- OTel: Complete Ledgers & DB ---",
      "type": "row",
@@ -975,7 +1191,7 @@
        "h": 1,
        "w": 24,
        "x": 0,
-        "y": 76
+        "y": 92
      },
      "collapsed": false,
      "panels": []
@@ -988,7 +1204,7 @@
        "h": 8,
        "w": 12,
        "x": 0,
-        "y": 77
+        "y": 93
      },
      "options": {
        "showHeader": true
@@ -1020,7 +1236,7 @@
        "h": 8,
        "w": 12,
        "x": 12,
-        "y": 77
+        "y": 93
      },
      "options": {
        "tooltip": {
@@ -1075,7 +1291,7 @@
        "h": 8,
        "w": 12,
        "x": 0,
-        "y": 85
+        "y": 101
      },
      "options": {
        "tooltip": {
@@ -1108,7 +1324,7 @@
        "h": 8,
        "w": 12,
        "x": 12,
-        "y": 85
+        "y": 101
      },
      "options": {
        "tooltip": {
@@ -1147,7 +1363,7 @@
        "h": 1,
        "w": 24,
        "x": 0,
-        "y": 93
+        "y": 109
      },
      "collapsed": false,
      "panels": []
@@ -1160,7 +1376,7 @@
        "h": 8,
        "w": 6,
        "x": 0,
-        "y": 94
+        "y": 110
      },
      "options": {
        "tooltip": {
@@ -1193,7 +1409,7 @@
        "h": 8,
        "w": 6,
        "x": 6,
-        "y": 94
+        "y": 110
      },
      "options": {
        "tooltip": {
@@ -1226,7 +1442,7 @@
        "h": 8,
        "w": 6,
        "x": 12,
-        "y": 94
+        "y": 110
      },
      "options": {
        "tooltip": {
@@ -1259,7 +1475,7 @@
        "h": 8,
        "w": 6,
        "x": 18,
-        "y": 94
+        "y": 110
      },
      "options": {
        "tooltip": {
@@ -1308,7 +1524,7 @@
        "h": 8,
        "w": 24,
        "x": 0,
-        "y": 102
+        "y": 118
      },
      "options": {
        "tooltip": {
@@ -1344,6 +1560,497 @@
        },
        "overrides": []
      }
+    },
+    {
+      "title": "--- Extended Metrics (Recovered from Phase 6) ---",
+      "type": "row",
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 32
+      },
+      "collapsed": false,
+      "panels": []
+    },
+    {
+      "title": "Key Jobs Execution Time",
+      "description": "Execution time for critical job types at the selected quantile. Sourced from per-job-type events in JobTypeData (JobTypeData.h:48). Shows how long key consensus, transaction, and maintenance jobs take to execute. Spikes indicate processing bottlenecks.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 33
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_acceptLedger{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Accept Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_advanceLedger{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Advance Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_transaction{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Transaction [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_writeObjects{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Write Objects [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_heartbeat{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Heartbeat [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_sweep{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Sweep [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_trustedValidation{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Trusted Validation [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_trustedProposal{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Trusted Proposal [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_publishNewLedger{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Publish New Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_clientRPC{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Client RPC [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_ledgerData{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Ledger Data [{{quantile}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "custom": {
+            "axisLabel": "Duration (ms)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Key Jobs Dequeue Wait Time",
+      "description": "Time spent waiting in the job queue before execution for critical job types. Sourced from per-job-type dequeue events (JobTypeData.h:47). High dequeue times indicate the job queue is backlogged and jobs are waiting too long to be scheduled.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 33
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_acceptLedger_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Accept Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_advanceLedger_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Advance Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_transaction_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Transaction [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_writeObjects_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Write Objects [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_heartbeat_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Heartbeat [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_sweep_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Sweep [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_trustedValidation_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Trusted Validation [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_trustedProposal_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Trusted Proposal [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_publishNewLedger_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Publish New Ledger [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_clientRPC_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Client RPC [{{quantile}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_ledgerData_q{quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "Ledger Data [{{quantile}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "custom": {
+            "axisLabel": "Wait Time (ms)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "FullBelowCache Size",
+      "description": "Number of entries in the FullBelowCache. Sourced from the TaggedCache size gauge (TaggedCache.h:183) for the Node family full below cache (NodeFamily.cpp:29). This cache tracks which SHAMap nodes have all children present locally, avoiding redundant fetches during ledger acquisition.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 41
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_Node_family_full_below_cache_size{exported_instance=~\"$node\"}",
+          "legendFormat": "FullBelowCache Size"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short",
+          "custom": {
+            "axisLabel": "Entries",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "FullBelowCache Hit Rate",
+      "description": "Hit rate percentage for the FullBelowCache. Sourced from the TaggedCache hit_rate gauge (TaggedCache.h:184). A high hit rate means the node is efficiently reusing cached knowledge about complete SHAMap subtrees. Low hit rates during steady state warrant investigation.",
+      "type": "gauge",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 41
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_Node_family_full_below_cache_hit_rate{exported_instance=~\"$node\"}",
+          "legendFormat": "Hit Rate"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "min": 0,
+          "max": 100,
+          "thresholds": {
+            "steps": [
+              {
+                "color": "red",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 25
+              },
+              {
+                "color": "green",
+                "value": 50
+              }
+            ]
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Ledger Publish Gap",
+      "description": "Difference between published and validated ledger ages. Computed as Published_Ledger_Age minus Validated_Ledger_Age. A value near zero means the publish pipeline keeps up with validation. A growing gap indicates the publish pipeline is falling behind, potentially causing stale data for subscribers.",
+      "type": "stat",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 49
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_LedgerMaster_Published_Ledger_Age{exported_instance=~\"$node\"} - rippled_LedgerMaster_Validated_Ledger_Age{exported_instance=~\"$node\"}",
+          "legendFormat": "Publish Gap"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "s",
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 5
+              },
+              {
+                "color": "red",
+                "value": 10
+              }
+            ]
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "State Duration Rate (Full vs Tracking)",
+      "description": "Rate of change of time spent in Full and Tracking operating modes, normalized to seconds. Sourced from State_Accounting duration gauges (NetworkOPs.cpp:774-778). In steady state the Full duration rate should be close to 1.0 (gaining one second of Full-mode time per wall-clock second). A drop below 1.0 means the node is spending time in other modes.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 49
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_State_Accounting_Full_duration{exported_instance=~\"$node\"}[5m]) / 1000000",
+          "legendFormat": "Full Mode Rate"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rate(rippled_State_Accounting_Tracking_duration{exported_instance=~\"$node\"}[5m]) / 1000000",
+          "legendFormat": "Tracking Mode Rate"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short",
+          "custom": {
+            "axisLabel": "Rate (s/s)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "All Jobs Execution Time (Detail)",
+      "description": "Execution time for ALL non-special job types at the selected quantile. Shows the complete picture of job execution performance. Use the Key Jobs panel for a focused view of the most critical jobs.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 57
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "{__name__=~\"rippled_(makeFetchPack|publishAcqLedger|untrustedValidation|manifest|localTransaction|ledgerReplayRequest|ledgerRequest|untrustedProposal|ledgerReplayTask|ledgerData|clientCommand|clientSubscribe|clientFeeChange|clientConsensus|clientAccountHistory|clientRPC|clientWebsocket|RPC|updatePaths|transaction|batch|advanceLedger|publishNewLedger|fetchTxnData|writeAhead|trustedValidation|writeObjects|acceptLedger|trustedProposal|sweep|clusterReport|heartbeat|administration|handleHaveTransactions|doTransactions)\", quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "{{__name__}} [{{quantile}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "custom": {
+            "axisLabel": "Duration (ms)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "All Jobs Dequeue Wait (Detail)",
+      "description": "Dequeue wait time for ALL non-special job types at the selected quantile. Shows the complete picture of job queue waiting times. High wait times across many job types indicate systemic job queue congestion.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 65
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "{__name__=~\"rippled_(makeFetchPack_q|publishAcqLedger_q|untrustedValidation_q|manifest_q|localTransaction_q|ledgerReplayRequest_q|ledgerRequest_q|untrustedProposal_q|ledgerReplayTask_q|ledgerData_q|clientCommand_q|clientSubscribe_q|clientFeeChange_q|clientConsensus_q|clientAccountHistory_q|clientRPC_q|clientWebsocket_q|RPC_q|updatePaths_q|transaction_q|batch_q|advanceLedger_q|publishNewLedger_q|fetchTxnData_q|writeAhead_q|trustedValidation_q|writeObjects_q|acceptLedger_q|trustedProposal_q|sweep_q|clusterReport_q|heartbeat_q|administration_q|handleHaveTransactions_q|doTransactions_q)\", quantile=\"$quantile\", exported_instance=~\"$node\"}",
+          "legendFormat": "{{__name__}} [{{quantile}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "custom": {
+            "axisLabel": "Wait Time (ms)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
    }
  ],
  "schemaVersion": 39,
@@ -1397,5 +2104,6 @@
    "to": "now"
  },
  "title": "Node Health (System Metrics)",
-  "uid": "rippled-system-node-health"
+  "uid": "rippled-system-node-health",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/system-overlay-traffic-detail.json
+++ b/docker/telemetry/grafana/dashboards/system-overlay-traffic-detail.json
@@ -583,5 +583,6 @@
    "to": "now"
  },
  "title": "Overlay Traffic Detail (System Metrics)",
-  "uid": "rippled-system-overlay-detail"
+  "uid": "rippled-system-overlay-detail",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/system-rpc-pathfinding.json
+++ b/docker/telemetry/grafana/dashboards/system-rpc-pathfinding.json
@@ -413,5 +413,6 @@
    "to": "now"
  },
  "title": "RPC & Pathfinding (System Metrics)",
-  "uid": "rippled-system-rpc"
+  "uid": "rippled-system-rpc",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/dashboards/transaction-overview.json
+++ b/docker/telemetry/grafana/dashboards/transaction-overview.json
@@ -147,8 +147,8 @@
          "datasource": {
            "type": "prometheus"
          },
-          "expr": "sum by (xrpl_tx_suppressed, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"tx.receive\", exported_instance=~\"$node\"}[$__rate_interval]))",
-          "legendFormat": "Suppressed={{xrpl_tx_suppressed}} [{{exported_instance}}]"
+          "expr": "sum by (suppressed, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"tx.receive\", exported_instance=~\"$node\"}[$__rate_interval]))",
+          "legendFormat": "Suppressed={{suppressed}} [{{exported_instance}}]"
        }
      ],
      "fieldConfig": {
@@ -336,7 +336,7 @@
      {
        "name": "node",
        "label": "Node",
-        "description": "Filter by rippled node (service.instance.id — e.g. Node-1)",
+        "description": "Filter by rippled node (service.instance.id \u2014 e.g. Node-1)",
        "type": "query",
        "query": "label_values(traces_span_metrics_calls_total, exported_instance)",
        "datasource": {
@@ -380,5 +380,6 @@
    "to": "now"
  },
  "title": "Transaction Overview",
-  "uid": "rippled-transactions"
+  "uid": "rippled-transactions",
+  "refresh": "5s"
 }
--- a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
+++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
@@ -116,16 +116,16 @@ datasources:
            operator: "="
            scope: span
            type: dynamic
-          # Phase 2: Node health filters (Task 2.8)
+          # Phase 2: Node health filters (Task 2.8) — resource attributes
          - id: node-amendment-blocked
            tag: xrpl.node.amendment_blocked
            operator: "="
-            scope: span
+            scope: resource
            type: static
          - id: node-server-state
            tag: xrpl.node.server_state
            operator: "="
-            scope: span
+            scope: resource
            type: dynamic
          # Phase 3: Transaction tracing filters
          - id: tx-hash
@@ -134,12 +134,12 @@ datasources:
            scope: span
            type: static
          - id: tx-origin
-            tag: xrpl.tx.local
+            tag: local
            operator: "="
            scope: span
            type: dynamic
          - id: tx-status
-            tag: xrpl.tx.status
+            tag: tx_status
            operator: "="
            scope: span
            type: dynamic
@@ -155,42 +155,42 @@ datasources:
            scope: span
            type: dynamic
          - id: consensus-ledger-seq
-            tag: xrpl.consensus.ledger.seq
+            tag: xrpl.ledger.seq
            operator: "="
            scope: span
            type: static
          - id: consensus-close-time-correct
-            tag: xrpl.consensus.close_time_correct
+            tag: close_time_correct
            operator: "="
            scope: span
            type: dynamic
          - id: consensus-state
-            tag: xrpl.consensus.state
+            tag: consensus_state
            operator: "="
            scope: span
            type: dynamic
          - id: consensus-close-resolution
-            tag: xrpl.consensus.close_resolution_ms
+            tag: close_resolution_ms
            operator: "="
            scope: span
            type: dynamic
          - id: consensus-proposers
-            tag: xrpl.consensus.proposers
+            tag: proposers
            operator: "="
            scope: span
            type: dynamic
          - id: consensus-result
-            tag: xrpl.consensus.result
+            tag: consensus_result
            operator: "="
            scope: span
            type: dynamic
          - id: consensus-mode-old
-            tag: xrpl.consensus.mode.old
+            tag: mode_old
            operator: "="
            scope: span
            type: dynamic
          - id: consensus-mode-new
-            tag: xrpl.consensus.mode.new
+            tag: mode_new
            operator: "="
            scope: span
            type: dynamic
@@ -201,17 +201,17 @@ datasources:
            type: static
          # Phase 3/4: Additional transaction and queue filters
          - id: tx-path
-            tag: xrpl.tx.path
+            tag: path
            operator: "="
            scope: span
            type: dynamic
          - id: tx-suppressed
-            tag: xrpl.tx.suppressed
+            tag: suppressed
            operator: "="
            scope: span
            type: dynamic
          - id: peer-version
-            tag: xrpl.peer.version
+            tag: peer_version
            operator: "="
            scope: span
            type: dynamic
--- a/docker/telemetry/otel-collector-config.yaml
+++ b/docker/telemetry/otel-collector-config.yaml
@@ -69,9 +69,9 @@ connectors:
      - name: xrpl.consensus.mode
      - name: close_time_correct
      - name: local
-      - name: xrpl.tx.suppressed
-      - name: xrpl.peer.proposal.trusted
-      - name: xrpl.peer.validation.trusted
+      - name: suppressed
+      - name: proposal_trusted
+      - name: validation_trusted

 exporters:
  debug:
--- a/docker/telemetry/validators-devnet.txt
+++ b/docker/telemetry/validators-devnet.txt
@@ -0,0 +1,11 @@
+# Devnet validator list configuration.
+#
+# Uses the Devnet validator list publisher operated by Ripple.
+# This fetches the current set of trusted validators for the
+# XRP Ledger Devnet automatically.
+
+[validator_list_sites]
+https://vl.devnet.rippletest.net
+
+[validator_list_keys]
+EDBB54B0D9AEE071BB37784AF5A9E7CC49AC7A0EFCE868C54532BCB966B9CFC13B
--- a/docker/telemetry/xrpld-telemetry.cfg
+++ b/docker/telemetry/xrpld-telemetry.cfg
@@ -1,17 +1,25 @@
-# Standalone xrpld configuration with OpenTelemetry enabled.
+# xrpld configuration for Devnet with full OpenTelemetry tracing.
+#
+# Connects to the XRP Ledger Devnet and exercises ALL instrumented
+# workflows: RPC, transactions, consensus, peer overlay, ledger ops,
+# and pathfinding.
 #
 # Usage:
 #   1. Start the observability stack:
 #        docker compose -f docker/telemetry/docker-compose.yml up -d
-#   2. Run xrpld in standalone mode:
-#        ./xrpld --conf docker/telemetry/xrpld-telemetry.cfg -a --start
-#   3. Send RPC commands to exercise tracing:
+#   2. Run xrpld:
+#        ./xrpld --conf docker/telemetry/xrpld-telemetry.cfg
+#   3. Wait for sync (server_state=full), then exercise workflows:
 #        curl -s http://localhost:5005 -d '{"method":"server_info"}'
 #   4. View traces in Grafana Explore -> Tempo: http://localhost:3000

+# --- Server ports -----------------------------------------------------------
+
 [server]
 port_rpc_admin_local
 port_ws_admin_local
+port_ws_public
+port_peer

 [port_rpc_admin_local]
 port = 5005
@@ -25,29 +33,90 @@ ip = 127.0.0.1
 admin = 127.0.0.1
 protocol = ws

+[port_ws_public]
+port = 6005
+ip = 0.0.0.0
+protocol = ws
+
+[port_peer]
+port = 51235
+ip = 0.0.0.0
+protocol = peer
+
+# --- Network ----------------------------------------------------------------
+
+[network_id]
+devnet
+
+[ips]
+s.devnet.rippletest.net 51235
+
+[validators_file]
+validators-devnet.txt
+
+[peer_private]
+0
+
+[peers_max]
+21
+
+# --- Pathfinding (exercises ripple_path_find / path_find workflows) ---------
+
+[path_search]
+7
+
+[path_search_fast]
+2
+
+[path_search_max]
+10
+
+# --- Signing (allows sign/sign_for RPC for test tx submission) --------------
+
+[signing_support]
+true
+
+# --- Database ---------------------------------------------------------------
+
 [node_db]
 type=NuDB
 path=docker/telemetry/data/nudb
-online_delete=256
+online_delete=2000
 advisory_delete=0

 [database_path]
 docker/telemetry/data

+[ledger_history]
+1000
+
+# --- Logging ----------------------------------------------------------------
+
 [debug_logfile]
-docker/telemetry/data/debug.log
+/tmp/xrpld-integration/devnet/debug.log

 [rpc_startup]
 { "command": "log_level", "severity": "debug" }

+# --- SSL --------------------------------------------------------------------
+
 [ssl_verify]
 0

-# --- OpenTelemetry tracing ---
+# --- Insight (native OTel metrics via beast::insight) -----------------------
+
+[insight]
+server=otel
+endpoint=http://localhost:4318/v1/metrics
+prefix=rippled
+
+# --- OpenTelemetry tracing --------------------------------------------------
+
 [telemetry]
 enabled=1
-service_instance_id=xrpld-standalone
+service_instance_id=xrpld-devnet
 endpoint=http://localhost:4318/v1/traces
+metrics_endpoint=http://localhost:4318/v1/metrics
 exporter=otlp_http
 sampling_ratio=1.0
 batch_size=512
@@ -56,5 +125,5 @@ max_queue_size=2048
 trace_rpc=1
 trace_transactions=1
 trace_consensus=1
-trace_peer=0
+trace_peer=1
 trace_ledger=1
--- a/src/libxrpl/telemetry/SpanGuard.cpp
+++ b/src/libxrpl/telemetry/SpanGuard.cpp
@@ -142,6 +142,32 @@ isCategoryEnabled(Telemetry const& tel, TraceCategory cat)
    return false;  // unreachable, silences compiler warning
 }

+namespace {
+
+// Map a TraceCategory to an OTel SpanKind so Tempo's service-graph /
+// RED metrics see the correct direction. RPC spans are emitted at the
+// server entry point (handler dispatch), Peer spans at inbound-message
+// receipt. Transactions / Consensus / Ledger are internal processing
+// and keep the default kInternal.
+otel_trace::SpanKind
+categoryToSpanKind(TraceCategory cat)
+{
+    switch (cat)
+    {
+        case TraceCategory::Rpc:
+            return otel_trace::SpanKind::kServer;
+        case TraceCategory::Peer:
+            return otel_trace::SpanKind::kConsumer;
+        case TraceCategory::Transactions:
+        case TraceCategory::Consensus:
+        case TraceCategory::Ledger:
+            return otel_trace::SpanKind::kInternal;
+    }
+    return otel_trace::SpanKind::kInternal;  // unreachable
+}
+
+}  // namespace
+
 SpanGuard
 SpanGuard::span(TraceCategory cat, std::string_view prefix, std::string_view name)
 {
@@ -149,7 +175,7 @@ SpanGuard::span(TraceCategory cat, std::string_view prefix, std::string_view nam
    if (!tel || !tel->isEnabled() || !isCategoryEnabled(*tel, cat))
        return {};
    auto fullName = std::string(prefix) + "." + std::string(name);
-    return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName)));
+    return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, categoryToSpanKind(cat))));
 }

 // ===== Child / linked span creation ========================================