From 892fee638afcd5c7a9e1459d02ed8eaf0bee53a5 Mon Sep 17 00:00:00 2001
From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
Date: Fri, 20 Mar 2026 17:23:06 +0000
Subject: [PATCH] Phase 9: Metric gap fill - nodestore, cache, TxQ, load factor
 dashboards

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../scripts/levelization/results/loops.txt    |   3 +
 .../scripts/levelization/results/ordering.txt |   7 +-
 OpenTelemetryPlan/06-implementation-phases.md | 313 ++++++++++-
 OpenTelemetryPlan/08-appendix.md              |  64 ++-
 .../09-data-collection-reference.md           | 335 +++++++++++-
 OpenTelemetryPlan/Phase10_taskList.md         | 242 +++++++++
 OpenTelemetryPlan/Phase11_taskList.md         | 453 ++++++++++++++++
 OpenTelemetryPlan/Phase9_taskList.md          | 312 +++++++++++
 .../dashboards/rippled-fee-market.json        | 343 ++++++++++++
 .../grafana/dashboards/rippled-job-queue.json | 395 ++++++++++++++
 .../grafana/dashboards/rippled-rpc-perf.json  | 404 ++++++++++++++
 .../dashboards/system-node-health.json        | 349 +++++++++++-
 docker/telemetry/integration-test.sh          |  48 ++
 include/xrpl/core/ServiceRegistry.h           |   9 +-
 src/tests/libxrpl/CMakeLists.txt              |   9 +
 .../libxrpl/telemetry/MetricsRegistry.cpp     | 346 ++++++++++++
 src/xrpld/app/main/Application.cpp            |  37 ++
 src/xrpld/perflog/detail/PerfLogImp.cpp       |  18 +
 src/xrpld/telemetry/MetricsRegistry.cpp       | 513 ++++++++++++++++++
 src/xrpld/telemetry/MetricsRegistry.h         | 284 ++++++++++
 20 files changed, 4453 insertions(+), 31 deletions(-)
 create mode 100644 OpenTelemetryPlan/Phase10_taskList.md
 create mode 100644 OpenTelemetryPlan/Phase11_taskList.md
 create mode 100644 OpenTelemetryPlan/Phase9_taskList.md
 create mode 100644 docker/telemetry/grafana/dashboards/rippled-fee-market.json
 create mode 100644 docker/telemetry/grafana/dashboards/rippled-job-queue.json
 create mode 100644 docker/telemetry/grafana/dashboards/rippled-rpc-perf.json
 create mode 100644 src/tests/libxrpl/telemetry/MetricsRegistry.cpp
 create mode 100644 src/xrpld/telemetry/MetricsRegistry.cpp
 create mode 100644 src/xrpld/telemetry/MetricsRegistry.h

diff --git a/.github/scripts/levelization/results/loops.txt b/.github/scripts/levelization/results/loops.txt
index 7914704f9d..1110b0b298 100644
--- a/.github/scripts/levelization/results/loops.txt
+++ b/.github/scripts/levelization/results/loops.txt
@@ -16,6 +16,9 @@ Loop: xrpld.app xrpld.rpc
 Loop: xrpld.app xrpld.shamap
   xrpld.shamap ~= xrpld.app
 
+Loop: xrpld.app xrpld.telemetry
+  xrpld.telemetry ~= xrpld.app
+
 Loop: xrpld.overlay xrpld.rpc
   xrpld.rpc ~= xrpld.overlay
 
diff --git a/.github/scripts/levelization/results/ordering.txt b/.github/scripts/levelization/results/ordering.txt
index 7a8023d61c..2e7ff014fd 100644
--- a/.github/scripts/levelization/results/ordering.txt
+++ b/.github/scripts/levelization/results/ordering.txt
@@ -180,6 +180,7 @@ test.toplevel > xrpl.json
 test.unit_test > xrpl.basics
 test.unit_test > xrpl.protocol
 tests.libxrpl > xrpl.basics
+tests.libxrpl > xrpl.core
 tests.libxrpl > xrpld.telemetry
 tests.libxrpl > xrpl.json
 tests.libxrpl > xrpl.net
@@ -229,7 +230,6 @@ xrpld.app > xrpl.basics
 xrpld.app > xrpl.core
 xrpld.app > xrpld.consensus
 xrpld.app > xrpld.core
-xrpld.app > xrpld.telemetry
 xrpld.app > xrpl.json
 xrpld.app > xrpl.ledger
 xrpld.app > xrpl.net
@@ -271,6 +271,7 @@ xrpld.peerfinder > xrpl.rdb
 xrpld.perflog > xrpl.basics
 xrpld.perflog > xrpl.core
 xrpld.perflog > xrpld.rpc
+xrpld.perflog > xrpld.telemetry
 xrpld.perflog > xrpl.json
 xrpld.rpc > xrpl.basics
 xrpld.rpc > xrpl.core
@@ -286,4 +287,8 @@ xrpld.rpc > xrpl.resource
 xrpld.rpc > xrpl.server
 xrpld.rpc > xrpl.tx
 xrpld.shamap > xrpl.shamap
+xrpld.telemetry > xrpl.basics
+xrpld.telemetry > xrpl.core
+xrpld.telemetry > xrpl.nodestore
+xrpld.telemetry > xrpl.server
 xrpld.telemetry > xrpl.telemetry
diff --git a/OpenTelemetryPlan/06-implementation-phases.md b/OpenTelemetryPlan/06-implementation-phases.md
index 1ae9ce59a3..75e62895c2 100644
--- a/OpenTelemetryPlan/06-implementation-phases.md
+++ b/OpenTelemetryPlan/06-implementation-phases.md
@@ -63,6 +63,15 @@ gantt
 
     section Phase 8
     Log-Trace Correlation     :p8, after p7, 1w
+
+    section Phase 9 (Future)
+    Internal Metric Gap Fill  :p9, after p8, 2.5w
+
+    section Phase 10 (Future)
+    Workload Validation       :p10, after p9, 2w
+
+    section Phase 11 (Future)
+    Third-Party Collection    :p11, after p10, 3w
 ```
 
 ---
@@ -656,6 +665,266 @@ flowchart LR
 
 ---
 
+## 6.8.2 Phase 9: Internal Metric Instrumentation Gap Fill (Weeks 14-15) — Future Enhancement
+
+> **Status**: Planned, not yet implemented.
+
+### Motivation
+
+Phases 1-8 establish trace spans, StatsD metrics bridge, native OTel metrics, and log-trace correlation. However, ~50+ metrics that exist inside rippled's `get_counts`, `server_info`, TxQ, PerfLog, and `CountedObject` systems have **no time-series export path**. These are the metrics that exchanges, payment processors, analytics providers, validators, and researchers need most — NodeStore I/O performance, cache hit rates, per-RPC-method counters, transaction queue depth, fee escalation levels, and live object instance counts.
+
+### Architecture
+
+Hybrid approach — two instrumentation strategies based on proximity to existing code:
+
+```mermaid
+flowchart TB
+    subgraph rippled["rippled process"]
+        subgraph existing["Existing beast::insight registrations"]
+            NS["NodeStore I/O<br/>(Database.cpp)"]
+        end
+        subgraph newreg["New OTel MetricsRegistry"]
+            CR["Cache Hit Rates<br/>(async gauge callbacks)"]
+            TQ["TxQ Metrics<br/>(async gauge callbacks)"]
+            PL["PerfLog RPC/Job<br/>(counters + histograms)"]
+            CO["CountedObjects<br/>(async gauge callbacks)"]
+            LF["Load Factors<br/>(async gauge callbacks)"]
+        end
+    end
+
+    subgraph export["Export Pipelines"]
+        BI["beast::insight<br/>OTelCollector (Phase 7)"]
+        OS["OTel Metrics SDK<br/>PeriodicMetricReader"]
+    end
+
+    NS --> BI
+    CR --> OS
+    TQ --> OS
+    PL --> OS
+    CO --> OS
+    LF --> OS
+
+    BI --> OTLP["OTLP/HTTP :4318<br/>/v1/metrics"]
+    OS --> OTLP
+
+    style rippled fill:#1a2633,color:#ccc,stroke:#4a90d9
+    style existing fill:#2a4a6b,color:#fff,stroke:#4a90d9
+    style newreg fill:#2a4a6b,color:#fff,stroke:#4a90d9
+    style export fill:#1a3320,color:#ccc,stroke:#5cb85c
+    style NS fill:#4a90d9,color:#fff,stroke:#2a6db5
+    style CR fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style TQ fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style PL fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style CO fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style LF fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style BI fill:#449d44,color:#fff,stroke:#2d6e2d
+    style OS fill:#449d44,color:#fff,stroke:#2d6e2d
+    style OTLP fill:#f0ad4e,color:#000,stroke:#c78c2e
+```
+
+- **beast::insight extensions** (blue): NodeStore I/O metrics added near existing `Database.cpp` registrations — exported via Phase 7's `OTelCollector`.
+- **OTel MetricsRegistry** (green): New centralized class using `ObservableGauge` async callbacks for cache, TxQ, PerfLog, CountedObjects, and load factors — polled at 10s intervals by `PeriodicMetricReader`.
+
+### Third-Party Consumer Context
+
+| Consumer Category      | Key Metrics They Need From Phase 9                              |
+| ---------------------- | --------------------------------------------------------------- |
+| Exchanges              | Fee escalation levels, TxQ depth, settlement latency            |
+| Payment Processors     | Load factors, io_latency, transaction throughput                |
+| Analytics Providers    | NodeStore I/O, cache hit rates, counted objects                 |
+| Validators / Operators | Per-job execution times, PerfLog RPC counters, consensus timing |
+| Academic Researchers   | Consensus performance time-series, fee market dynamics          |
+| Institutional Custody  | Server health scores, reserve calculations, node availability   |
+
+### Tasks
+
+| Task | Description                               |
+| ---- | ----------------------------------------- |
+| 9.1  | NodeStore I/O metrics                     |
+| 9.2  | Cache hit rate metrics + MetricsRegistry  |
+| 9.3  | TxQ metrics                               |
+| 9.4  | PerfLog per-RPC metrics                   |
+| 9.5  | PerfLog per-job metrics                   |
+| 9.6  | Counted object instance metrics           |
+| 9.7  | Fee escalation & load factor metrics      |
+| 9.8  | New Grafana dashboards (2 new, 2 updated) |
+| 9.9  | Update documentation                      |
+| 9.10 | Integration tests                         |
+
+See [Phase9_taskList.md](./Phase9_taskList.md) for detailed per-task breakdown.
+
+### Exit Criteria
+
+- [ ] All ~50 new metrics visible in Prometheus via OTLP pipeline
+- [ ] `MetricsRegistry` class registers/deregisters cleanly with OTel SDK
+- [ ] 2 new Grafana dashboards operational (Fee Market, Job Queue)
+- [ ] No performance regression (< 0.5% CPU overhead from new callbacks)
+- [ ] Documentation updated with full new metric inventory
+
+---
+
+## 6.8.3 Phase 10: Synthetic Workload Generation & Telemetry Validation (Weeks 16-17) — Future Enhancement
+
+> **Status**: Planned, not yet implemented.
+
+### Motivation
+
+Before the telemetry stack (Phases 1-9) can be considered production-ready, we need automated proof that all 16 spans, 22 attributes, 300+ metrics, 10 Grafana dashboards, and log-trace correlation work correctly under realistic load. This phase establishes a reusable CI-integrated validation suite and performance benchmark baseline.
+
+### Architecture
+
+```mermaid
+flowchart LR
+    subgraph harness["Docker Compose Workload Harness"]
+        direction TB
+        V1["Validator 1"] ~~~ V2["Validator 2"] ~~~ V3["Validator 3"]
+        V4["Validator 4"] ~~~ V5["Validator 5"]
+    end
+
+    subgraph generators["Workload Generators"]
+        RPC["RPC Load Generator<br/>(configurable RPS,<br/>command distribution)"]
+        TX["Transaction Submitter<br/>(Payment, Offer, NFT,<br/>Escrow, AMM mix)"]
+    end
+
+    subgraph validation["Validation Suite"]
+        SV["Span Validator<br/>(Jaeger/Tempo API)"]
+        MV["Metric Validator<br/>(Prometheus API)"]
+        LV["Log-Trace Validator<br/>(Loki API)"]
+        DV["Dashboard Validator<br/>(Grafana API)"]
+        BM["Benchmark Suite<br/>(CPU, memory, latency<br/>ON vs OFF comparison)"]
+    end
+
+    generators --> harness
+    harness --> validation
+
+    style harness fill:#1a2633,color:#ccc,stroke:#4a90d9
+    style generators fill:#1a3320,color:#ccc,stroke:#5cb85c
+    style validation fill:#332a1a,color:#ccc,stroke:#f0ad4e
+    style V1 fill:#4a90d9,color:#fff,stroke:#2a6db5
+    style V2 fill:#4a90d9,color:#fff,stroke:#2a6db5
+    style V3 fill:#4a90d9,color:#fff,stroke:#2a6db5
+    style V4 fill:#4a90d9,color:#fff,stroke:#2a6db5
+    style V5 fill:#4a90d9,color:#fff,stroke:#2a6db5
+    style RPC fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style TX fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style SV fill:#f0ad4e,color:#000,stroke:#c78c2e
+    style MV fill:#f0ad4e,color:#000,stroke:#c78c2e
+    style LV fill:#f0ad4e,color:#000,stroke:#c78c2e
+    style DV fill:#f0ad4e,color:#000,stroke:#c78c2e
+    style BM fill:#f0ad4e,color:#000,stroke:#c78c2e
+```
+
+### Tasks
+
+| Task | Description                            |
+| ---- | -------------------------------------- |
+| 10.1 | Multi-node test harness (5 validators) |
+| 10.2 | RPC load generator                     |
+| 10.3 | Transaction submitter (6+ tx types)    |
+| 10.4 | Telemetry validation suite             |
+| 10.5 | Performance benchmark suite            |
+| 10.6 | CI integration                         |
+| 10.7 | Documentation                          |
+
+See [Phase10_taskList.md](./Phase10_taskList.md) for detailed per-task breakdown.
+
+### Exit Criteria
+
+- [ ] 5-node validator cluster starts and reaches consensus in docker-compose
+- [ ] Validation suite confirms all 16 spans, 22 attributes, 300+ metrics
+- [ ] All 10 Grafana dashboards render data (no empty panels)
+- [ ] Benchmark shows < 3% CPU overhead, < 5MB memory overhead
+- [ ] CI workflow runs validation on telemetry branch changes
+
+---
+
+## 6.8.4 Phase 11: Third-Party Data Collection Pipelines (Weeks 18-20) — Future Enhancement
+
+> **Status**: Planned, not yet implemented.
+
+### Motivation
+
+rippled has no native Prometheus/OTLP metrics export for data accessible only via JSON-RPC (`server_info`, `get_counts`, `fee`, `peers`, `validators`, `feature`). Every external consumer — exchanges, payment processors, analytics providers, validators, compliance firms, DeFi protocols, researchers, custodians, and CBDC platforms — must build custom JSON-RPC polling and conversion pipelines. This phase centralizes that work into a reusable custom OTel Collector receiver.
+
+### Architecture
+
+```mermaid
+flowchart LR
+    subgraph receiver["Custom OTel Collector Receiver (Go)"]
+        direction TB
+        SI["server_info<br/>collector"]
+        GC["get_counts<br/>collector"]
+        FE["fee<br/>collector"]
+        PE["peers<br/>collector"]
+        VA["validators<br/>collector"]
+        DX["DEX/AMM<br/>collector<br/>(optional)"]
+    end
+
+    rippled["rippled<br/>Admin RPC<br/>:5005"] -->|"JSON-RPC<br/>poll every 30s"| receiver
+
+    receiver -->|"xrpl_* metrics"| PROM["Prometheus<br/>:9090"]
+    receiver -->|"OTLP export"| OTLP["Any OTLP-<br/>compatible<br/>backend"]
+
+    PROM --> GF["Grafana<br/>4 new dashboards"]
+    PROM --> AL["Prometheus<br/>Alerting Rules"]
+
+    style receiver fill:#1a3320,color:#ccc,stroke:#5cb85c
+    style SI fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style GC fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style FE fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style PE fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style VA fill:#5cb85c,color:#fff,stroke:#3d8b3d
+    style DX fill:#449d44,color:#fff,stroke:#2d6e2d
+    style rippled fill:#4a90d9,color:#fff,stroke:#2a6db5
+    style PROM fill:#f0ad4e,color:#000,stroke:#c78c2e
+    style OTLP fill:#f0ad4e,color:#000,stroke:#c78c2e
+    style GF fill:#5bc0de,color:#000,stroke:#3aa8c1
+    style AL fill:#d9534f,color:#fff,stroke:#b52d2d
+```
+
+### Third-Party Consumer Gap Analysis
+
+| Consumer Category      | Data Unlocked by Phase 11                                    |
+| ---------------------- | ------------------------------------------------------------ |
+| Exchanges              | Real-time fee estimates, TxQ capacity, server health scores  |
+| Payment Processors     | Settlement latency percentiles, corridor health              |
+| Analytics Providers    | Validator metrics, network topology, amendment voting status |
+| DeFi / AMM             | AMM pool TVL, DEX order book depth, trade volumes            |
+| Validators / Operators | Per-peer latency, version distribution, UNL health, alerting |
+| Compliance             | Transaction volume trends, network growth metrics            |
+| Academic Researchers   | Consensus performance time-series, decentralization metrics  |
+| CBDC / Tokenization    | Token supply tracking, trust line adoption, freeze status    |
+| Institutional Custody  | Multi-sig status, escrow tracking, reserve calculations      |
+| Wallet Providers       | Server health for node selection, fee prediction data        |
+
+### Tasks
+
+| Task  | Description                           |
+| ----- | ------------------------------------- |
+| 11.1  | OTel Collector receiver scaffold (Go) |
+| 11.2  | server_info / server_state collector  |
+| 11.3  | get_counts collector                  |
+| 11.4  | Peer topology collector               |
+| 11.5  | Validator & amendment collector       |
+| 11.6  | Fee & TxQ collector                   |
+| 11.7  | DEX & AMM collector (optional)        |
+| 11.8  | Prometheus alerting rules             |
+| 11.9  | New Grafana dashboards (4)            |
+| 11.10 | Integration with Phase 10 validation  |
+| 11.11 | Documentation                         |
+
+See [Phase11_taskList.md](./Phase11_taskList.md) for detailed per-task breakdown.
+
+### Exit Criteria
+
+- [ ] Custom OTel Collector receiver exports all `xrpl_*` metrics to Prometheus
+- [ ] 4 new Grafana dashboards operational (Validator Health, Network Topology, Fee Market, DEX/AMM)
+- [ ] Prometheus alerting rules fire correctly for simulated failures
+- [ ] Receiver handles rippled restart/unavailability gracefully
+- [ ] Go receiver has unit tests with >80% coverage
+
+---
+
 ## 6.9 Risk Assessment
 
 ```mermaid
@@ -853,14 +1122,13 @@ quadrantChart
 
 ---
 
-
-## 6.13 Definition of Done
+## 6.12 Definition of Done
 
 > **TxQ** = Transaction Queue | **HA** = High Availability
 
 Clear, measurable criteria for each phase.
 
-### 6.13.1 Phase 1: Core Infrastructure
+### 6.12.1 Phase 1: Core Infrastructure
 
 
 | Criterion       | Measurement                                                | Target                       |
@@ -873,8 +1141,7 @@ Clear, measurable criteria for each phase.
 
 **Definition of Done**: All criteria met, PR merged, no regressions in CI.
 
-
-### 6.13.2 Phase 2: RPC Tracing
+### 6.12.2 Phase 2: RPC Tracing
 
 
 | Criterion          | Measurement                        | Target                     |
@@ -888,7 +1155,7 @@ Clear, measurable criteria for each phase.
 **Definition of Done**: RPC traces visible in Tempo for all commands, dashboard shows latency distribution.
 
 
-### 6.13.3 Phase 3: Transaction Tracing
+### 6.12.3 Phase 3: Transaction Tracing
 
 
 | Criterion        | Measurement                     | Target                             |
@@ -901,8 +1168,7 @@ Clear, measurable criteria for each phase.
 
 **Definition of Done**: Transaction traces span 3+ nodes in test network, performance within bounds.
 
-
-### 6.13.4 Phase 4: Consensus Tracing
+### 6.12.4 Phase 4: Consensus Tracing
 
 
 | Criterion            | Measurement                   | Target                    |
@@ -915,8 +1181,7 @@ Clear, measurable criteria for each phase.
 
 **Definition of Done**: Consensus rounds fully traceable, no impact on consensus timing.
 
-
-### 6.13.5 Phase 5: Production Deployment
+### 6.12.5 Phase 5: Production Deployment
 
 
 | Criterion    | Measurement                  | Target                     |
@@ -930,23 +1195,25 @@ Clear, measurable criteria for each phase.
 
 **Definition of Done**: Telemetry running in production, operators trained, alerts active.
 
+### 6.12.6 Success Metrics Summary
 
-### 6.13.6 Success Metrics Summary
-
-| Phase   | Primary Metric               | Secondary Metric            | Deadline       |
-| ------- | ---------------------------- | --------------------------- | -------------- |
-| Phase 1 | SDK compiles and runs        | Zero overhead when disabled | End of Week 2  |
-| Phase 2 | 100% RPC coverage            | <1ms latency overhead       | End of Week 4  |
-| Phase 3 | Cross-node traces work       | <5% throughput impact       | End of Week 6  |
-| Phase 4 | Consensus fully traced       | No consensus timing impact  | End of Week 8  |
-| Phase 5 | Production deployment        | Operators trained           | End of Week 9  |
-| Phase 6 | StatsD metrics in Prometheus | 3 dashboards operational    | End of Week 10 |
-| Phase 7 | All metrics via OTLP         | No StatsD dependency        | End of Week 12 |
-| Phase 8 | trace_id in logs + Loki      | Tempo↔Loki correlation      | End of Week 13 |
+| Phase    | Primary Metric                   | Secondary Metric            | Deadline       | Status             |
+| -------- | -------------------------------- | --------------------------- | -------------- | ------------------ |
+| Phase 1  | SDK compiles and runs            | Zero overhead when disabled | End of Week 2  | Active             |
+| Phase 2  | 100% RPC coverage                | <1ms latency overhead       | End of Week 4  | Active             |
+| Phase 3  | Cross-node traces work           | <5% throughput impact       | End of Week 6  | Active             |
+| Phase 4  | Consensus fully traced           | No consensus timing impact  | End of Week 8  | Active             |
+| Phase 5  | Production deployment            | Operators trained           | End of Week 9  | Active             |
+| Phase 6  | StatsD metrics in Prometheus     | 3 dashboards operational    | End of Week 10 | Active             |
+| Phase 7  | All metrics via OTLP             | No StatsD dependency        | End of Week 12 | Active             |
+| Phase 8  | trace_id in logs + Loki          | Tempo↔Loki correlation      | End of Week 13 | Active             |
+| Phase 9  | 50+ new internal metrics in Prom | 2 new dashboards            | End of Week 15 | Future Enhancement |
+| Phase 10 | Full telemetry stack validated   | < 3% CPU overhead proven    | End of Week 17 | Future Enhancement |
+| Phase 11 | Third-party metrics via receiver | 4 new dashboards + alerting | End of Week 20 | Future Enhancement |
 
 ---
 
-## 6.14 Recommended Implementation Order
+## 6.13 Recommended Implementation Order
 
 Based on ROI analysis, implement in this exact order:
 
diff --git a/OpenTelemetryPlan/08-appendix.md b/OpenTelemetryPlan/08-appendix.md
index 0b64b19067..b6e12fd318 100644
--- a/OpenTelemetryPlan/08-appendix.md
+++ b/OpenTelemetryPlan/08-appendix.md
@@ -43,6 +43,18 @@
 | **LoadManager**   | Dynamic fee escalation based on network load                  |
 | **SHAMap**        | SHA-256 hash-based map (Merkle trie variant) for ledger state |
 
+### Phase 9–11 Terms
+
+| Term                        | Definition                                                                |
+| --------------------------- | ------------------------------------------------------------------------- |
+| **MetricsRegistry**         | Centralized class for OTel async gauge registrations (Phase 9)            |
+| **ObservableGauge**         | OTel Metrics SDK async instrument polled via callback at fixed intervals  |
+| **PeriodicMetricReader**    | OTel SDK component that invokes gauge callbacks at configurable intervals |
+| **CountedObject**           | rippled template that tracks live instance counts via atomic counters     |
+| **TxQ**                     | Transaction queue managing fee escalation and ordering                    |
+| **Load Factor**             | Combined multiplier affecting transaction cost (local, cluster, network)  |
+| **OTel Collector Receiver** | Custom Go plugin that polls rippled RPC and emits OTel metrics (Phase 11) |
+
 ---
 
 ## 8.2 Span Hierarchy Visualization
@@ -162,7 +174,8 @@ flowchart TB
 | ------- | ---------- | ------ | -------------------------------------------------------------- |
 | 1.0     | 2026-02-12 | -      | Initial implementation plan                                    |
 | 1.1     | 2026-02-13 | -      | Refactored into modular documents                              |
-| 1.2     | 2026-03-24 | -      | Review fixes: accuracy corrections, cross-document consistency |
+| 1.2     | 2026-03-09 | -      | Added Phases 9–11 (future enhancement plans)                   |
+| 1.3     | 2026-03-24 | -      | Review fixes: accuracy corrections, cross-document consistency |
 
 ---
 
@@ -197,8 +210,57 @@ flowchart TB
 | [Phase5_IntegrationTest_taskList.md](./Phase5_IntegrationTest_taskList.md) | Observability stack integration tests               |
 | [Phase7_taskList.md](./Phase7_taskList.md)                                 | Native OTel metrics migration                       |
 | [Phase8_taskList.md](./Phase8_taskList.md)                                 | Log-trace correlation                               |
+| [Phase9_taskList.md](./Phase9_taskList.md)                                 | Internal metric instrumentation gap fill (future)   |
+| [Phase10_taskList.md](./Phase10_taskList.md)                               | Synthetic workload generation & validation (future) |
+| [Phase11_taskList.md](./Phase11_taskList.md)                               | Third-party data collection pipelines (future)      |
 | [presentation.md](./presentation.md)                                       | Presentation slides for OpenTelemetry plan overview |
 
+> **Note**: Phases 1 and 6 do not have separate task list files. Phase 1 tasks are documented in [06-implementation-phases.md §6.2](./06-implementation-phases.md). Phase 6 tasks are documented in [06-implementation-phases.md §6.7](./06-implementation-phases.md).
+
+---
+
+## 8.6 Phase 9–11 Cross-Reference Guide
+
+This guide maps Phase 9–11 content to its location across the documentation.
+
+### Phase 9: Internal Metric Instrumentation Gap Fill
+
+| Content                         | Location                                                                 |
+| ------------------------------- | ------------------------------------------------------------------------ |
+| Plan & architecture             | [06-implementation-phases.md §6.8.2](./06-implementation-phases.md)      |
+| Task list (10 tasks)            | [Phase9_taskList.md](./Phase9_taskList.md)                               |
+| Future metric definitions (~50) | [09-data-collection-reference.md §5b](./09-data-collection-reference.md) |
+| New class: `MetricsRegistry`    | `src/xrpld/telemetry/MetricsRegistry.h/.cpp` (planned)                   |
+| New dashboards                  | `rippled-fee-market`, `rippled-job-queue` (planned)                      |
+
+**Metric categories**: NodeStore I/O, Cache Hit Rates, TxQ, PerfLog Per-RPC, PerfLog Per-Job, Counted Objects, Fee Escalation & Load Factors.
+
+### Phase 10: Synthetic Workload Generation & Telemetry Validation
+
+| Content              | Location                                                                 |
+| -------------------- | ------------------------------------------------------------------------ |
+| Plan & architecture  | [06-implementation-phases.md §6.8.3](./06-implementation-phases.md)      |
+| Task list (7 tasks)  | [Phase10_taskList.md](./Phase10_taskList.md)                             |
+| Validation inventory | [09-data-collection-reference.md §5c](./09-data-collection-reference.md) |
+| Test harness         | `docker/telemetry/docker-compose.workload.yaml` (planned)                |
+| CI workflow          | `.github/workflows/telemetry-validation.yml` (planned)                   |
+
+**Validates**: 16 spans, 22 attributes, 300+ metrics, 10 dashboards, log-trace correlation.
+
+### Phase 11: Third-Party Data Collection Pipelines
+
+| Content                           | Location                                                                 |
+| --------------------------------- | ------------------------------------------------------------------------ |
+| Plan & architecture               | [06-implementation-phases.md §6.8.4](./06-implementation-phases.md)      |
+| Task list (11 tasks)              | [Phase11_taskList.md](./Phase11_taskList.md)                             |
+| External metric definitions (~30) | [09-data-collection-reference.md §5d](./09-data-collection-reference.md) |
+| Custom OTel Collector receiver    | `docker/telemetry/otel-rippled-receiver/` (planned)                      |
+| Prometheus alerting rules (11)    | [09-data-collection-reference.md §5d](./09-data-collection-reference.md) |
+| New dashboards (4)                | Validator Health, Network Topology, Fee Market (External), DEX & AMM     |
+
+**Consumer categories**: Exchanges, Payment Processors, DeFi/AMM, NFT Marketplaces, Analytics Providers, Wallets, Compliance, Academic Researchers, Institutional Custody, CBDC Bridge Operators.
+>>>>>>> 58b5170180 (Phase 9: Metric gap fill - nodestore, cache, TxQ, load factor dashboards)
+
 ---
 
 _Previous: [Observability Backends](./07-observability-backends.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
diff --git a/OpenTelemetryPlan/09-data-collection-reference.md b/OpenTelemetryPlan/09-data-collection-reference.md
index 0da5148d4c..e208c38e09 100644
--- a/OpenTelemetryPlan/09-data-collection-reference.md
+++ b/OpenTelemetryPlan/09-data-collection-reference.md
@@ -11,6 +11,7 @@ graph LR
     subgraph rippledNode["rippled Node"]
         A["Trace Macros<br/>XRPL_TRACE_SPAN<br/>(OTLP/HTTP exporter)"]
         B["beast::insight<br/>OTel native metrics<br/>(OTLP/HTTP exporter)"]
+        C["MetricsRegistry<br/>OTel SDK metrics<br/>(OTLP/HTTP exporter)"]
     end
 
     subgraph collector["OTel Collector  :4317 / :4318"]
@@ -32,11 +33,12 @@ graph LR
     end
 
     subgraph viz["Visualization"]
-        F["Grafana  :3000<br/>10 dashboards"]
+        F["Grafana  :3000<br/>13 dashboards"]
     end
 
     A -->|"OTLP/HTTP :4318<br/>(traces + attributes)"| R1
     B -->|"OTLP/HTTP :4318<br/>(gauges, counters, histograms)"| R1
+    C -->|"OTLP/HTTP :4318<br/>(counters, histograms,<br/>observable gauges)"| R1
 
     BP -->|"OTLP/gRPC :4317"| D
 
@@ -563,6 +565,337 @@ count_over_time({job="rippled"} |= "trace_id=" [5m])
 
 ---
 
+## 5b. Future: Internal Metric Gap Fill (Phase 9)
+
+> **Status**: Planned, not yet implemented.
+> **Plan details**: [06-implementation-phases.md §6.8.2](./06-implementation-phases.md) — motivation, architecture, third-party context
+> **Task breakdown**: [Phase9_taskList.md](./Phase9_taskList.md) — per-task implementation details
+
+Phase 9 fills ~50+ metrics that exist inside rippled but currently lack time-series export. Uses a hybrid approach: `beast::insight` extensions for NodeStore I/O, OTel `ObservableGauge` async callbacks for new categories.
+
+### New Metric Categories
+
+#### NodeStore I/O (via beast::insight)
+
+| Prometheus Metric                    | Type  | Description                         |
+| ------------------------------------ | ----- | ----------------------------------- |
+| `rippled_nodestore_reads_total`      | Gauge | Cumulative read operations          |
+| `rippled_nodestore_reads_hit`        | Gauge | Cache-served reads                  |
+| `rippled_nodestore_writes`           | Gauge | Cumulative write operations         |
+| `rippled_nodestore_written_bytes`    | Gauge | Cumulative bytes written            |
+| `rippled_nodestore_read_bytes`       | Gauge | Cumulative bytes read               |
+| `rippled_nodestore_read_duration_us` | Gauge | Cumulative read time (microseconds) |
+| `rippled_nodestore_write_load`       | Gauge | Current write load score            |
+| `rippled_nodestore_read_queue`       | Gauge | Items in read queue                 |
+
+#### Cache Hit Rates (via OTel MetricsRegistry)
+
+| Prometheus Metric               | Type  | Description                          |
+| ------------------------------- | ----- | ------------------------------------ |
+| `rippled_cache_SLE_hit_rate`    | Gauge | SLE cache hit rate (0.0-1.0)         |
+| `rippled_cache_ledger_hit_rate` | Gauge | Ledger object cache hit rate         |
+| `rippled_cache_AL_hit_rate`     | Gauge | AcceptedLedger cache hit rate        |
+| `rippled_cache_treenode_size`   | Gauge | SHAMap TreeNode cache size (entries) |
+| `rippled_cache_fullbelow_size`  | Gauge | FullBelow cache size                 |
+
+#### Transaction Queue (via OTel MetricsRegistry)
+
+| Prometheus Metric                      | Type  | Description                      |
+| -------------------------------------- | ----- | -------------------------------- |
+| `rippled_txq_count`                    | Gauge | Current transactions in queue    |
+| `rippled_txq_max_size`                 | Gauge | Maximum queue capacity           |
+| `rippled_txq_in_ledger`                | Gauge | Transactions in open ledger      |
+| `rippled_txq_per_ledger`               | Gauge | Expected transactions per ledger |
+| `rippled_txq_open_ledger_fee_level`    | Gauge | Open ledger fee escalation level |
+| `rippled_txq_med_fee_level`            | Gauge | Median fee level in queue        |
+| `rippled_txq_reference_fee_level`      | Gauge | Reference fee level              |
+| `rippled_txq_min_processing_fee_level` | Gauge | Minimum fee to get processed     |
+
+#### PerfLog Per-RPC Method (via OTel Metrics SDK)
+
+| Prometheus Metric                       | Type      | Labels            | Description                 |
+| --------------------------------------- | --------- | ----------------- | --------------------------- |
+| `rippled_rpc_method_started_total`      | Counter   | `method="<name>"` | RPC calls started           |
+| `rippled_rpc_method_finished_total`     | Counter   | `method="<name>"` | RPC calls completed         |
+| `rippled_rpc_method_errored_total`      | Counter   | `method="<name>"` | RPC calls errored           |
+| `rippled_rpc_method_duration_us_bucket` | Histogram | `method="<name>"` | Execution time distribution |
+
+#### PerfLog Per-Job Type (via OTel Metrics SDK)
+
+| Prometheus Metric                        | Type      | Labels              | Description     |
+| ---------------------------------------- | --------- | ------------------- | --------------- |
+| `rippled_job_queued_total`               | Counter   | `job_type="<name>"` | Jobs queued     |
+| `rippled_job_started_total`              | Counter   | `job_type="<name>"` | Jobs started    |
+| `rippled_job_finished_total`             | Counter   | `job_type="<name>"` | Jobs completed  |
+| `rippled_job_queued_duration_us_bucket`  | Histogram | `job_type="<name>"` | Queue wait time |
+| `rippled_job_running_duration_us_bucket` | Histogram | `job_type="<name>"` | Execution time  |
+
+#### Counted Object Instances (via OTel MetricsRegistry)
+
+| Prometheus Metric      | Type  | Labels          | Description                     |
+| ---------------------- | ----- | --------------- | ------------------------------- |
+| `rippled_object_count` | Gauge | `type="<name>"` | Live instances of internal type |
+
+Tracked types: `Transaction`, `Ledger`, `NodeObject`, `STTx`, `STLedgerEntry`, `InboundLedger`, `Pathfinder`, `PathRequest`, `HashRouterEntry`
+
+#### Fee Escalation & Load Factors (via OTel MetricsRegistry)
+
+| Prometheus Metric                    | Type  | Description                          |
+| ------------------------------------ | ----- | ------------------------------------ |
+| `rippled_load_factor`                | Gauge | Combined transaction cost multiplier |
+| `rippled_load_factor_server`         | Gauge | Server + cluster + network load      |
+| `rippled_load_factor_local`          | Gauge | Local server load only               |
+| `rippled_load_factor_net`            | Gauge | Network-wide load estimate           |
+| `rippled_load_factor_cluster`        | Gauge | Cluster peer load                    |
+| `rippled_load_factor_fee_escalation` | Gauge | Open ledger fee escalation           |
+| `rippled_load_factor_fee_queue`      | Gauge | Queue entry fee level                |
+
+### New Grafana Dashboards (Phase 9)
+
+| Dashboard          | UID                  | Data Source | Key Panels                                                        |
+| ------------------ | -------------------- | ----------- | ----------------------------------------------------------------- |
+| Fee Market & TxQ   | `rippled-fee-market` | Prometheus  | TxQ depth/capacity, fee levels, load factor breakdown, escalation |
+| Job Queue Analysis | `rippled-job-queue`  | Prometheus  | Per-job rates, queue wait times, execution times, queue depth     |
+
+---
+
+## 5c. Future: Synthetic Workload Generation & Telemetry Validation (Phase 10)
+
+> **Status**: Planned, not yet implemented.
+> **Plan details**: [06-implementation-phases.md §6.8.3](./06-implementation-phases.md) — motivation, architecture
+> **Task breakdown**: [Phase10_taskList.md](./Phase10_taskList.md) — per-task implementation details
+
+Phase 10 builds a 5-node validator docker-compose harness with RPC load generators, transaction submitters, and automated validation scripts that verify all spans, metrics, dashboards, and log-trace correlation work end-to-end. Includes a benchmark suite comparing telemetry-ON vs telemetry-OFF overhead.
+
+### Validated Telemetry Inventory
+
+| Category           | Expected Count | Validation Method                |
+| ------------------ | -------------- | -------------------------------- |
+| Trace spans        | 16             | Jaeger/Tempo API query           |
+| Span attributes    | 22             | Per-span attribute assertion     |
+| StatsD metrics     | 255+           | Prometheus query                 |
+| Phase 9 metrics    | 50+            | Prometheus query                 |
+| SpanMetrics RED    | 4 per span     | Prometheus query                 |
+| Grafana dashboards | 10             | Dashboard API "no data" check    |
+| Log-trace links    | Present        | Loki query + Tempo reverse check |
+
+---
+
+## 5d. Future: Third-Party Data Collection Pipelines (Phase 11)
+
+> **Status**: Planned, not yet implemented.
+> **Plan details**: [06-implementation-phases.md §6.8.4](./06-implementation-phases.md) — motivation, architecture, consumer gap analysis
+> **Task breakdown**: [Phase11_taskList.md](./Phase11_taskList.md) — per-task implementation details
+
+Phase 11 builds a custom OTel Collector receiver (Go) that polls rippled's admin RPCs and exports `xrpl_*` metrics for external consumers. No rippled code changes.
+
+### Exported Metrics (via Custom OTel Collector Receiver)
+
+#### Node Health (from server_info)
+
+| Prometheus Metric                       | Type  | Description                                     |
+| --------------------------------------- | ----- | ----------------------------------------------- |
+| `xrpl_server_state`                     | Gauge | Operating mode (0=disconnected ... 5=proposing) |
+| `xrpl_server_state_duration_seconds`    | Gauge | Seconds in current state                        |
+| `xrpl_uptime_seconds`                   | Gauge | Consecutive seconds running                     |
+| `xrpl_io_latency_ms`                    | Gauge | I/O subsystem latency                           |
+| `xrpl_amendment_blocked`                | Gauge | 1 if amendment-blocked, 0 otherwise             |
+| `xrpl_peers_count`                      | Gauge | Connected peers                                 |
+| `xrpl_validated_ledger_seq`             | Gauge | Latest validated ledger sequence                |
+| `xrpl_validated_ledger_age_seconds`     | Gauge | Seconds since last validated close              |
+| `xrpl_last_close_proposers`             | Gauge | Proposers in last consensus round               |
+| `xrpl_last_close_converge_time_seconds` | Gauge | Last consensus round duration                   |
+| `xrpl_load_factor`                      | Gauge | Transaction cost multiplier                     |
+| `xrpl_state_duration_seconds`           | Gauge | Per-state duration (`state` label)              |
+| `xrpl_state_transitions_total`          | Gauge | Per-state transition count (`state` label)      |
+
+#### Peer Topology (from peers)
+
+| Prometheus Metric           | Type  | Description                         |
+| --------------------------- | ----- | ----------------------------------- |
+| `xrpl_peers_inbound_count`  | Gauge | Inbound peer connections            |
+| `xrpl_peers_outbound_count` | Gauge | Outbound peer connections           |
+| `xrpl_peer_latency_p50_ms`  | Gauge | Median peer latency                 |
+| `xrpl_peer_latency_p95_ms`  | Gauge | p95 peer latency                    |
+| `xrpl_peer_version_count`   | Gauge | Peers per version (`version` label) |
+| `xrpl_peer_diverged_count`  | Gauge | Peers with diverged tracking status |
+
+#### Validator & Amendment (from validators, feature)
+
+| Prometheus Metric                     | Type  | Description                             |
+| ------------------------------------- | ----- | --------------------------------------- |
+| `xrpl_trusted_validators_count`       | Gauge | UNL validator count                     |
+| `xrpl_amendment_enabled_count`        | Gauge | Enabled amendments                      |
+| `xrpl_amendment_majority_count`       | Gauge | Amendments with majority                |
+| `xrpl_amendment_unsupported_majority` | Gauge | 1 if unsupported amendment has majority |
+| `xrpl_validator_list_active`          | Gauge | 1 if validator list is active           |
+
+#### Fee Market (from fee)
+
+| Prometheus Metric                | Type  | Description                           |
+| -------------------------------- | ----- | ------------------------------------- |
+| `xrpl_fee_open_ledger_fee_drops` | Gauge | Minimum fee for open ledger inclusion |
+| `xrpl_fee_median_fee_drops`      | Gauge | Median fee level                      |
+| `xrpl_fee_queue_size`            | Gauge | Current transaction queue depth       |
+| `xrpl_fee_current_ledger_size`   | Gauge | Transactions in current open ledger   |
+
+#### DEX & AMM (optional, from book_offers, amm_info)
+
+| Prometheus Metric          | Type  | Labels                | Description            |
+| -------------------------- | ----- | --------------------- | ---------------------- |
+| `xrpl_amm_tvl_drops`       | Gauge | `pool="<id>"`         | Total value locked     |
+| `xrpl_amm_trading_fee`     | Gauge | `pool="<id>"`         | Pool trading fee (bps) |
+| `xrpl_orderbook_bid_depth` | Gauge | `pair="<base/quote>"` | Total bid volume       |
+| `xrpl_orderbook_ask_depth` | Gauge | `pair="<base/quote>"` | Total ask volume       |
+| `xrpl_orderbook_spread`    | Gauge | `pair="<base/quote>"` | Best bid-ask spread    |
+
+### Phase 9: OTel SDK-Exported Metrics (MetricsRegistry)
+
+Phase 9 introduces the `MetricsRegistry` class (`src/xrpld/telemetry/MetricsRegistry.h/.cpp`)
+which registers metrics directly with the OpenTelemetry Metrics SDK. These are exported
+via OTLP/HTTP to the OTel Collector and scraped by Prometheus.
+
+#### NodeStore I/O (Observable Gauge — `nodestore_state`)
+
+| Prometheus Metric                                      | Type  | Labels   | Description                          |
+| ------------------------------------------------------ | ----- | -------- | ------------------------------------ |
+| `rippled_nodestore_state{metric="node_reads_total"}`   | Gauge | `metric` | Cumulative NodeStore read operations |
+| `rippled_nodestore_state{metric="node_reads_hit"}`     | Gauge | `metric` | Reads served from cache              |
+| `rippled_nodestore_state{metric="node_writes"}`        | Gauge | `metric` | Cumulative write operations          |
+| `rippled_nodestore_state{metric="node_written_bytes"}` | Gauge | `metric` | Cumulative bytes written             |
+| `rippled_nodestore_state{metric="node_read_bytes"}`    | Gauge | `metric` | Cumulative bytes read                |
+| `rippled_nodestore_state{metric="write_load"}`         | Gauge | `metric` | Current write load score             |
+| `rippled_nodestore_state{metric="read_queue"}`         | Gauge | `metric` | Items in read prefetch queue         |
+
+#### Cache Hit Rates & Sizes (Observable Gauge — `cache_metrics`)
+
+| Prometheus Metric                                     | Type  | Labels   | Description                   |
+| ----------------------------------------------------- | ----- | -------- | ----------------------------- |
+| `rippled_cache_metrics{metric="SLE_hit_rate"}`        | Gauge | `metric` | SLE cache hit rate (0.0-1.0)  |
+| `rippled_cache_metrics{metric="ledger_hit_rate"}`     | Gauge | `metric` | Ledger cache hit rate         |
+| `rippled_cache_metrics{metric="AL_hit_rate"}`         | Gauge | `metric` | AcceptedLedger cache hit rate |
+| `rippled_cache_metrics{metric="treenode_cache_size"}` | Gauge | `metric` | SHAMap TreeNode cache entries |
+| `rippled_cache_metrics{metric="treenode_track_size"}` | Gauge | `metric` | Tracked tree nodes            |
+| `rippled_cache_metrics{metric="fullbelow_size"}`      | Gauge | `metric` | FullBelow cache entries       |
+
+#### Transaction Queue (Observable Gauge — `txq_metrics`)
+
+| Prometheus Metric                                            | Type  | Labels   | Description                      |
+| ------------------------------------------------------------ | ----- | -------- | -------------------------------- |
+| `rippled_txq_metrics{metric="txq_count"}`                    | Gauge | `metric` | Transactions currently in queue  |
+| `rippled_txq_metrics{metric="txq_max_size"}`                 | Gauge | `metric` | Maximum queue capacity           |
+| `rippled_txq_metrics{metric="txq_in_ledger"}`                | Gauge | `metric` | Transactions in open ledger      |
+| `rippled_txq_metrics{metric="txq_per_ledger"}`               | Gauge | `metric` | Expected transactions per ledger |
+| `rippled_txq_metrics{metric="txq_reference_fee_level"}`      | Gauge | `metric` | Reference fee level              |
+| `rippled_txq_metrics{metric="txq_min_processing_fee_level"}` | Gauge | `metric` | Minimum fee to get processed     |
+| `rippled_txq_metrics{metric="txq_med_fee_level"}`            | Gauge | `metric` | Median fee level in queue        |
+| `rippled_txq_metrics{metric="txq_open_ledger_fee_level"}`    | Gauge | `metric` | Open ledger fee escalation level |
+
+#### Per-RPC Method Metrics (Synchronous Counters/Histogram)
+
+| Prometheus Metric                   | Type      | Labels            | Description                      |
+| ----------------------------------- | --------- | ----------------- | -------------------------------- |
+| `rippled_rpc_method_started_total`  | Counter   | `method="<name>"` | RPC calls started                |
+| `rippled_rpc_method_finished_total` | Counter   | `method="<name>"` | RPC calls completed successfully |
+| `rippled_rpc_method_errored_total`  | Counter   | `method="<name>"` | RPC calls that errored           |
+| `rippled_rpc_method_duration_us`    | Histogram | `method="<name>"` | Execution time distribution (us) |
+
+#### Per-Job-Type Metrics (Synchronous Counters/Histogram)
+
+| Prometheus Metric                 | Type      | Labels              | Description                       |
+| --------------------------------- | --------- | ------------------- | --------------------------------- |
+| `rippled_job_queued_total`        | Counter   | `job_type="<name>"` | Jobs enqueued                     |
+| `rippled_job_started_total`       | Counter   | `job_type="<name>"` | Jobs started                      |
+| `rippled_job_finished_total`      | Counter   | `job_type="<name>"` | Jobs completed                    |
+| `rippled_job_queued_duration_us`  | Histogram | `job_type="<name>"` | Queue wait time distribution (us) |
+| `rippled_job_running_duration_us` | Histogram | `job_type="<name>"` | Execution time distribution (us)  |
+
+#### Counted Object Instances (Observable Gauge — `object_count`)
+
+| Prometheus Metric                              | Type  | Labels          | Description                    |
+| ---------------------------------------------- | ----- | --------------- | ------------------------------ |
+| `rippled_object_count{type="Transaction"}`     | Gauge | `type="<name>"` | Live Transaction objects       |
+| `rippled_object_count{type="Ledger"}`          | Gauge | `type="<name>"` | Live Ledger objects            |
+| `rippled_object_count{type="NodeObject"}`      | Gauge | `type="<name>"` | Live NodeObject instances      |
+| `rippled_object_count{type="STTx"}`            | Gauge | `type="<name>"` | Serialized transaction objects |
+| `rippled_object_count{type="STLedgerEntry"}`   | Gauge | `type="<name>"` | Serialized ledger entries      |
+| `rippled_object_count{type="InboundLedger"}`   | Gauge | `type="<name>"` | Ledgers being fetched          |
+| `rippled_object_count{type="Pathfinder"}`      | Gauge | `type="<name>"` | Active pathfinding operations  |
+| `rippled_object_count{type="PathRequest"}`     | Gauge | `type="<name>"` | Active path requests           |
+| `rippled_object_count{type="HashRouterEntry"}` | Gauge | `type="<name>"` | Hash router entries            |
+
+#### Load Factor Breakdown (Observable Gauge — `load_factor_metrics`)
+
+| Prometheus Metric                                                  | Type  | Labels   | Description                             |
+| ------------------------------------------------------------------ | ----- | -------- | --------------------------------------- |
+| `rippled_load_factor_metrics{metric="load_factor"}`                | Gauge | `metric` | Combined transaction cost multiplier    |
+| `rippled_load_factor_metrics{metric="load_factor_server"}`         | Gauge | `metric` | Server + cluster + network contribution |
+| `rippled_load_factor_metrics{metric="load_factor_local"}`          | Gauge | `metric` | Local server load only                  |
+| `rippled_load_factor_metrics{metric="load_factor_net"}`            | Gauge | `metric` | Network-wide load estimate              |
+| `rippled_load_factor_metrics{metric="load_factor_cluster"}`        | Gauge | `metric` | Cluster peer load                       |
+| `rippled_load_factor_metrics{metric="load_factor_fee_escalation"}` | Gauge | `metric` | Open ledger fee escalation              |
+| `rippled_load_factor_metrics{metric="load_factor_fee_queue"}`      | Gauge | `metric` | Queue entry fee level                   |
+
+#### Prometheus Query Examples (Phase 9)
+
+```promql
+# NodeStore cache hit ratio
+rippled_nodestore_state{metric="node_reads_hit"} / rippled_nodestore_state{metric="node_reads_total"}
+
+# RPC error rate for server_info
+rate(rippled_rpc_method_errored_total{method="server_info"}[5m])
+
+# Job queue wait time p95
+histogram_quantile(0.95, sum by (le) (rate(rippled_job_queued_duration_us_bucket[5m])))
+
+# TxQ utilization percentage
+rippled_txq_metrics{metric="txq_count"} / rippled_txq_metrics{metric="txq_max_size"}
+
+# High load factor alert candidate
+rippled_load_factor_metrics{metric="load_factor"} > 5
+```
+
+### New Grafana Dashboards (Phase 9)
+
+| Dashboard              | UID                  | Data Source | Key Panels                                                |
+| ---------------------- | -------------------- | ----------- | --------------------------------------------------------- |
+| Fee Market & TxQ       | `rippled-fee-market` | Prometheus  | TxQ depth/capacity, fee levels, load factor breakdown     |
+| Job Queue Analysis     | `rippled-job-queue`  | Prometheus  | Per-job rates, queue wait times, execution times          |
+| RPC Performance (OTel) | `rippled-rpc-perf`   | Prometheus  | Per-method call rates, error rates, latency distributions |
+
+### Updated Grafana Dashboards (Phase 9)
+
+| Dashboard            | UID                          | New Panels Added                                       |
+| -------------------- | ---------------------------- | ------------------------------------------------------ |
+| Node Health (StatsD) | `rippled-statsd-node-health` | NodeStore I/O, cache hit rates, object instance counts |
+
+### New Grafana Dashboards (Phase 11)
+
+| Dashboard          | UID                           | Data Source | Key Panels                                                             |
+| ------------------ | ----------------------------- | ----------- | ---------------------------------------------------------------------- |
+| Validator Health   | `rippled-validator-health`    | Prometheus  | Server state timeline, proposer count, converge time, amendment voting |
+| Network Topology   | `rippled-network-topology`    | Prometheus  | Peer count, version distribution, latency distribution, diverged peers |
+| Fee Market (Ext)   | `rippled-fee-market-external` | Prometheus  | Fee levels, queue depth, load factor breakdown, escalation timeline    |
+| DEX & AMM Overview | `rippled-dex-amm`             | Prometheus  | AMM TVL, order book depth, spread trends, trading fee revenue          |
+
+### Prometheus Alerting Rules (Phase 11)
+
+| Alert Name                         | Severity | Condition                                                   | For |
+| ---------------------------------- | -------- | ----------------------------------------------------------- | --- |
+| `XRPLServerNotFull`                | Critical | `xrpl_server_state < 4` for 15m                             | 15m |
+| `XRPLAmendmentBlocked`             | Critical | `xrpl_amendment_blocked == 1`                               | 1m  |
+| `XRPLNoPeers`                      | Critical | `xrpl_peers_count == 0`                                     | 5m  |
+| `XRPLLedgerStale`                  | Critical | `xrpl_validated_ledger_age_seconds > 120`                   | 2m  |
+| `XRPLHighIOLatency`                | Critical | `xrpl_io_latency_ms > 100`                                  | 5m  |
+| `XRPLUnsupportedAmendmentMajority` | Critical | `xrpl_amendment_unsupported_majority == 1`                  | 1m  |
+| `XRPLLowPeerCount`                 | Warning  | `xrpl_peers_count < 10`                                     | 15m |
+| `XRPLHighLoadFactor`               | Warning  | `xrpl_load_factor > 10`                                     | 10m |
+| `XRPLSlowConsensus`                | Warning  | `xrpl_last_close_converge_time_seconds > 6`                 | 5m  |
+| `XRPLValidatorListExpiring`        | Warning  | `(xrpl_validator_list_expiration_seconds - time()) < 86400` | 1h  |
+| `XRPLStateFlapping`                | Warning  | `rate(xrpl_state_transitions_total{state="full"}[1h]) > 2`  | 30m |
+
+---
+
 ## 6. Known Issues
 
 | Issue                                                              | Impact                                           | Status                                                               |
diff --git a/OpenTelemetryPlan/Phase10_taskList.md b/OpenTelemetryPlan/Phase10_taskList.md
new file mode 100644
index 0000000000..80a3603ffc
--- /dev/null
+++ b/OpenTelemetryPlan/Phase10_taskList.md
@@ -0,0 +1,242 @@
+# Phase 10: Synthetic Workload Generation & Telemetry Validation — Task List
+
+> **Status**: Future Enhancement
+>
+> **Goal**: Build tools that generate realistic XRPL traffic to validate the full Phases 1-9 telemetry stack end-to-end — all spans, attributes, metrics, dashboards, and log-trace correlation — under controlled load.
+>
+> **Scope**: Python/shell test harness + multi-node docker-compose environment + automated validation scripts + performance benchmarks.
+>
+> **Branch**: `pratik/otel-phase10-workload-validation` (from `pratik/otel-phase9-metric-gap-fill`)
+>
+> **Depends on**: Phase 9 (internal metric gap fill) — validates the full metric surface
+
+### Related Plan Documents
+
+| Document                                                             | Relevance                                                       |
+| -------------------------------------------------------------------- | --------------------------------------------------------------- |
+| [06-implementation-phases.md](./06-implementation-phases.md)         | Phase 10 plan: motivation, architecture, exit criteria (§6.8.3) |
+| [09-data-collection-reference.md](./09-data-collection-reference.md) | Defines the full inventory of spans/metrics to validate         |
+| [Phase9_taskList.md](./Phase9_taskList.md)                           | Prerequisite — all internal metrics must be emitting            |
+
+### Why This Phase Exists
+
+Before Phases 1-9 can be considered production-ready, we need proof that:
+
+1. All 16 spans fire with correct attributes under real transaction workloads
+2. All 255+ StatsD metrics + ~50 Phase 9 metrics appear in Prometheus with non-zero values
+3. Log-trace correlation (Phase 8) produces clickable trace_id links in Loki
+4. All 10 Grafana dashboards render meaningful data (no empty panels)
+5. Performance overhead stays within bounds (< 3% CPU, < 5MB memory)
+6. The telemetry stack survives sustained load without data loss or queue backpressure
+
+---
+
+## Task 10.1: Multi-Node Test Harness
+
+**Objective**: Create a docker-compose environment with 3-5 validator nodes that produces real consensus rounds.
+
+**What to do**:
+
+- Create `docker/telemetry/docker-compose.workload.yaml`:
+  - 5 rippled validator nodes with UNL configured for each other
+  - All telemetry enabled: `[telemetry] enabled=1`, `[insight] server=otel`
+  - Full OTel stack: Collector, Jaeger, Tempo, Prometheus, Loki, Grafana
+  - Shared network with service discovery
+
+- Each node should:
+  - Generate validator keys at startup
+  - Configure all 5 nodes in its UNL
+  - Enable all trace categories including `trace_peer=1`
+  - Write logs to a file tailed by the OTel Collector filelog receiver
+
+- Include a `Makefile` target: `make telemetry-workload-up` / `make telemetry-workload-down`
+
+**Key files**:
+
+- New: `docker/telemetry/docker-compose.workload.yaml`
+- New: `docker/telemetry/workload/generate-validator-keys.sh`
+- New: `docker/telemetry/workload/xrpld-validator.cfg.template`
+
+---
+
+## Task 10.2: RPC Load Generator
+
+**Objective**: Configurable tool that fires all traced RPC commands at controlled rates.
+
+**What to do**:
+
+- Create `docker/telemetry/workload/rpc_load_generator.py`:
+  - Connects to one or more rippled WebSocket endpoints
+  - Fires all RPC commands that have trace spans: `server_info`, `ledger`, `tx`, `account_info`, `account_lines`, `fee`, `submit`, etc.
+  - Configurable parameters: rate (RPS), duration, command distribution weights
+  - Injects `traceparent` HTTP headers to test W3C context propagation
+  - Logs progress and errors to stdout
+
+- Command distribution should match realistic production ratios:
+  - 40% `server_info` / `fee` (health checks)
+  - 30% `account_info` / `account_lines` / `account_objects` (wallet queries)
+  - 15% `ledger` / `ledger_data` (explorer queries)
+  - 10% `tx` / `account_tx` (transaction lookups)
+  - 5% `book_offers` / `amm_info` (DEX queries)
+
+**Key files**:
+
+- New: `docker/telemetry/workload/rpc_load_generator.py`
+- New: `docker/telemetry/workload/requirements.txt`
+
+---
+
+## Task 10.3: Transaction Submitter
+
+**Objective**: Generate diverse transaction types to exercise `tx.*` and `ledger.*` spans.
+
+**What to do**:
+
+- Create `docker/telemetry/workload/tx_submitter.py`:
+  - Pre-funds test accounts from genesis account
+  - Submits a mix of transaction types:
+    - `Payment` (XRP and issued currencies) — exercises `tx.process`, `tx.apply`
+    - `OfferCreate` / `OfferCancel` — DEX activity
+    - `TrustSet` — trust line creation for issued currencies
+    - `NFTokenMint` / `NFTokenCreateOffer` / `NFTokenAcceptOffer` — NFT activity
+    - `EscrowCreate` / `EscrowFinish` — escrow lifecycle
+    - `AMMCreate` / `AMMDeposit` / `AMMWithdraw` — AMM pool operations (if amendment enabled)
+  - Configurable: TPS target, transaction mix weights, duration
+  - Monitors submission results and tracks success/failure rates
+
+- The transaction mix ensures the telemetry captures the full range of ledger activity that third parties care about.
+
+**Key files**:
+
+- New: `docker/telemetry/workload/tx_submitter.py`
+- New: `docker/telemetry/workload/test_accounts.json` (pre-generated keypairs)
+
+---
+
+## Task 10.4: Telemetry Validation Suite
+
+**Objective**: Automated scripts that verify all expected telemetry data exists after a workload run.
+
+**What to do**:
+
+- Create `docker/telemetry/workload/validate_telemetry.py`:
+
+  **Span validation** (queries Jaeger/Tempo API):
+  - Assert all 16 span names appear in traces
+  - Assert each span has its required attributes (22 total attributes across spans)
+  - Assert parent-child relationships are correct (`rpc.request` → `rpc.process` → `rpc.command.*`)
+  - Assert span durations are reasonable (> 0, < 60s)
+
+  **Metric validation** (queries Prometheus API):
+  - Assert all SpanMetrics-derived metrics are non-zero: `traces_span_metrics_calls_total`, `traces_span_metrics_duration_milliseconds_bucket`
+  - Assert all StatsD metrics are non-zero: `rippled_LedgerMaster_Validated_Ledger_Age`, `rippled_Peer_Finder_Active_*`, etc.
+  - Assert all Phase 9 metrics are non-zero: `rippled_nodestore_*`, `rippled_cache_*`, `rippled_txq_*`, `rippled_rpc_method_*`, `rippled_object_count`, `rippled_load_factor*`
+  - Assert metric label cardinality is within bounds
+
+  **Log-trace correlation validation** (queries Loki API):
+  - Assert logs contain `trace_id=` and `span_id=` fields
+  - Pick a random trace_id from Jaeger → query Loki for matching logs → assert results exist
+  - Assert Grafana derived field links are functional
+
+  **Dashboard validation**:
+  - For each of the 10 Grafana dashboards, query the dashboard API and assert no panels show "No data"
+
+- Output: JSON report with pass/fail per check, suitable for CI.
+
+**Key files**:
+
+- New: `docker/telemetry/workload/validate_telemetry.py`
+- New: `docker/telemetry/workload/expected_spans.json` (span inventory for validation)
+- New: `docker/telemetry/workload/expected_metrics.json` (metric inventory for validation)
+
+---
+
+## Task 10.5: Performance Benchmark Suite
+
+**Objective**: Measure CPU/memory/latency overhead of the telemetry stack.
+
+**What to do**:
+
+- Create `docker/telemetry/workload/benchmark.sh`:
+  - **Baseline run**: Start cluster with `[telemetry] enabled=0`, run transaction workload for 5 minutes, record metrics
+  - **Telemetry run**: Start cluster with full telemetry enabled, run identical workload, record metrics
+  - **Comparison**: Calculate deltas for:
+    - CPU usage (per-node average)
+    - Memory RSS (per-node peak)
+    - RPC p99 latency
+    - Transaction throughput (TPS)
+    - Consensus round time p95
+    - Ledger close time p95
+
+- Output: Markdown table comparing baseline vs. telemetry, with pass/fail against targets:
+  - CPU overhead < 3%
+  - Memory overhead < 5MB
+  - RPC latency impact < 2ms p99
+  - Throughput impact < 5%
+  - Consensus impact < 1%
+
+- Store results in `docker/telemetry/workload/benchmark-results/` for historical tracking.
+
+**Key files**:
+
+- New: `docker/telemetry/workload/benchmark.sh`
+- New: `docker/telemetry/workload/collect_system_metrics.sh`
+
+---
+
+## Task 10.6: CI Integration
+
+**Objective**: Wire the validation suite into CI for regression detection.
+
+**What to do**:
+
+- Create a CI workflow (GitHub Actions or equivalent) that:
+  1. Builds rippled with `-DXRPL_ENABLE_TELEMETRY=ON`
+  2. Starts the multi-node workload harness
+  3. Runs the RPC load generator + transaction submitter for 2 minutes
+  4. Runs the validation suite
+  5. Runs the benchmark suite
+  6. Fails the build if any validation check fails or benchmark exceeds thresholds
+  7. Archives the validation report and benchmark results as artifacts
+
+- This should be a separate workflow (not part of the main CI), triggered manually or on telemetry-related branch changes.
+
+**Key files**:
+
+- New: `.github/workflows/telemetry-validation.yml`
+- New: `docker/telemetry/workload/run-full-validation.sh` (orchestrator script)
+
+---
+
+## Task 10.7: Documentation
+
+**Objective**: Document the workload tools and validation process.
+
+**What to do**:
+
+- Create `docker/telemetry/workload/README.md`:
+  - Quick start guide for running workload harness
+  - Configuration options for load generator and tx submitter
+  - How to read validation reports
+  - How to run benchmarks and interpret results
+
+- Update `docs/telemetry-runbook.md`:
+  - Add "Validating Telemetry Stack" section
+  - Add "Performance Benchmarking" section
+
+- Update `OpenTelemetryPlan/09-data-collection-reference.md`:
+  - Add "Validation" section with expected metric/span counts
+
+---
+
+## Exit Criteria
+
+- [ ] 5-node validator cluster starts and reaches consensus in docker-compose
+- [ ] RPC load generator fires all traced RPC commands at configurable rates
+- [ ] Transaction submitter generates 6+ transaction types at configurable TPS
+- [ ] Validation suite confirms all 16 spans, 22 attributes, 300+ metrics are present
+- [ ] Log-trace correlation validated end-to-end (Loki ↔ Tempo)
+- [ ] All 10 Grafana dashboards render data (no empty panels)
+- [ ] Benchmark shows < 3% CPU overhead, < 5MB memory overhead
+- [ ] CI workflow runs validation on telemetry branch changes
+- [ ] Validation report output is CI-parseable (JSON with exit codes)
diff --git a/OpenTelemetryPlan/Phase11_taskList.md b/OpenTelemetryPlan/Phase11_taskList.md
new file mode 100644
index 0000000000..7743950cda
--- /dev/null
+++ b/OpenTelemetryPlan/Phase11_taskList.md
@@ -0,0 +1,453 @@
+# Phase 11: Third-Party Data Collection Pipelines — Task List
+
+> **Status**: Future Enhancement
+>
+> **Goal**: Build a custom OTel Collector receiver that periodically polls rippled's admin RPCs and exports structured metrics for external consumers — making all XRPL health, validator, peer, fee, and DEX data available as Prometheus/OTLP metrics without rippled code changes.
+>
+> **Scope**: Go-based OTel Collector receiver plugin + Grafana dashboards + Prometheus alerting rules.
+>
+> **Branch**: `pratik/otel-phase11-third-party-collection` (from `pratik/otel-phase10-workload-validation`)
+>
+> **Depends on**: Phase 10 (validation harness for testing the new receiver)
+
+### Related Plan Documents
+
+| Document                                                             | Relevance                                                       |
+| -------------------------------------------------------------------- | --------------------------------------------------------------- |
+| [06-implementation-phases.md](./06-implementation-phases.md)         | Phase 11 plan: motivation, architecture, exit criteria (§6.8.4) |
+| [09-data-collection-reference.md](./09-data-collection-reference.md) | Defines full metric inventory including third-party metrics     |
+| [Phase10_taskList.md](./Phase10_taskList.md)                         | Prerequisite — validation harness for testing                   |
+
+### Third-Party Consumer Gap Analysis
+
+This phase addresses the cross-cutting gap identified during research: **rippled has no native Prometheus/OTLP metrics export for data accessible only via RPC**. Every consumer (exchanges, payment processors, analytics providers, validators, researchers, compliance firms, custodians) must build custom JSON-RPC polling and conversion. This receiver centralizes that work.
+
+| Consumer Category          | Data Unlocked by This Phase                                        |
+| -------------------------- | ------------------------------------------------------------------ |
+| **Exchanges**              | Real-time fee estimates, TxQ capacity, server health scores        |
+| **Payment Processors**     | Settlement latency percentiles, corridor health, path availability |
+| **Analytics Providers**    | Validator metrics, network topology, amendment voting status       |
+| **DeFi / AMM**             | AMM pool TVL, DEX order book depth, trade volumes                  |
+| **Validators / Operators** | Per-peer latency, version distribution, UNL health, alerting       |
+| **Compliance**             | Transaction volume trends, network growth metrics                  |
+| **Academic Researchers**   | Consensus performance time-series, decentralization metrics        |
+| **CBDC / Tokenization**    | Token supply tracking, trust line adoption, freeze status          |
+| **Institutional Custody**  | Multi-sig status, escrow tracking, reserve calculations            |
+| **Wallet Providers**       | Server health for node selection, fee prediction data              |
+
+---
+
+## Task 11.1: OTel Collector Receiver Scaffold
+
+**Objective**: Create the Go project structure for a custom OTel Collector receiver that polls rippled JSON-RPC.
+
+**What to do**:
+
+- Create `docker/telemetry/otel-rippled-receiver/`:
+  - `receiver.go` — implements `receiver.Metrics` interface
+  - `config.go` — configuration struct (endpoint, poll interval, enabled RPCs)
+  - `factory.go` — receiver factory registration
+  - `go.mod` / `go.sum` — Go module with OTel Collector SDK dependency
+
+- Configuration model:
+
+  ```yaml
+  rippled_receiver:
+    endpoint: "http://localhost:5005" # rippled admin RPC
+    poll_interval: 30s # how often to poll
+    enabled_collectors:
+      - server_info
+      - get_counts
+      - fee
+      - peers
+      - validators
+      - feature
+      - server_state
+    amm_pools: [] # optional: AMM pool IDs to track
+    book_offers_pairs: [] # optional: currency pairs for DEX depth
+  ```
+
+- Build a custom OTel Collector binary that includes this receiver alongside the standard receivers.
+
+**Key files**:
+
+- New: `docker/telemetry/otel-rippled-receiver/receiver.go`
+- New: `docker/telemetry/otel-rippled-receiver/config.go`
+- New: `docker/telemetry/otel-rippled-receiver/factory.go`
+- New: `docker/telemetry/otel-rippled-receiver/go.mod`
+- New: `docker/telemetry/otel-rippled-receiver/Dockerfile`
+
+---
+
+## Task 11.2: server_info / server_state Collector
+
+**Objective**: Poll `server_info` and `server_state` and export all fields as OTel metrics.
+
+**What to do**:
+
+- Implement `serverInfoCollector` that calls `server_info` (admin) and extracts:
+
+  **Node Health Gauges:**
+  - `xrpl_server_state` (enum → int: disconnected=0, connected=1, syncing=2, tracking=3, full=4, proposing=5)
+  - `xrpl_server_state_duration_seconds`
+  - `xrpl_uptime_seconds`
+  - `xrpl_io_latency_ms`
+  - `xrpl_amendment_blocked` (0 or 1)
+  - `xrpl_peers_count`
+  - `xrpl_peer_disconnects_total`
+  - `xrpl_peer_disconnects_resources_total`
+  - `xrpl_jq_trans_overflow_total`
+
+  **Consensus Gauges:**
+  - `xrpl_last_close_proposers`
+  - `xrpl_last_close_converge_time_seconds`
+  - `xrpl_validation_quorum`
+
+  **Ledger Gauges:**
+  - `xrpl_validated_ledger_seq`
+  - `xrpl_validated_ledger_age_seconds`
+  - `xrpl_validated_ledger_base_fee_drops`
+  - `xrpl_validated_ledger_reserve_base_drops`
+  - `xrpl_validated_ledger_reserve_inc_drops`
+  - `xrpl_close_time_offset_seconds` (0 when absent)
+
+  **Load Factor Gauges:**
+  - `xrpl_load_factor`
+  - `xrpl_load_factor_server`
+  - `xrpl_load_factor_fee_escalation`
+  - `xrpl_load_factor_fee_queue`
+  - `xrpl_load_factor_local`
+  - `xrpl_load_factor_net`
+  - `xrpl_load_factor_cluster`
+
+  **State Accounting Gauges** (per state: disconnected, connected, syncing, tracking, full):
+  - `xrpl_state_duration_seconds{state="<name>"}`
+  - `xrpl_state_transitions_total{state="<name>"}`
+
+  **Validator Info** (when node is a validator):
+  - `xrpl_validator_list_count`
+  - `xrpl_validator_list_expiration_seconds` (epoch)
+  - `xrpl_validator_list_active` (0 or 1)
+
+**Key files**:
+
+- New: `docker/telemetry/otel-rippled-receiver/collectors/server_info.go`
+
+---
+
+## Task 11.3: get_counts Collector
+
+**Objective**: Poll `get_counts` and export internal object counts and NodeStore stats.
+
+**What to do**:
+
+- Implement `getCountsCollector`:
+
+  **Database Gauges:**
+  - `xrpl_db_size_kb{db="total"}`, `xrpl_db_size_kb{db="ledger"}`, `xrpl_db_size_kb{db="transaction"}`
+
+  **NodeStore Gauges:**
+  - `xrpl_nodestore_reads_total`, `xrpl_nodestore_reads_hit`, `xrpl_nodestore_writes_total`
+  - `xrpl_nodestore_read_bytes`, `xrpl_nodestore_written_bytes`
+  - `xrpl_nodestore_read_duration_us`, `xrpl_nodestore_write_load`
+  - `xrpl_nodestore_read_queue`, `xrpl_nodestore_read_threads_running`
+
+  **Cache Gauges:**
+  - `xrpl_cache_hit_rate{cache="SLE"}`, `xrpl_cache_hit_rate{cache="ledger"}`, `xrpl_cache_hit_rate{cache="accepted_ledger"}`
+  - `xrpl_cache_size{cache="treenode"}`, `xrpl_cache_size{cache="fullbelow"}`, `xrpl_cache_size{cache="accepted_ledger"}`
+
+  **Object Count Gauges:**
+  - `xrpl_object_count{type="<name>"}` for each counted object type (Transaction, Ledger, NodeObject, STTx, STLedgerEntry, InboundLedger, Pathfinder, etc.)
+
+  **Rates:**
+  - `xrpl_historical_fetch_per_minute`
+  - `xrpl_local_txs`
+
+**Key files**:
+
+- New: `docker/telemetry/otel-rippled-receiver/collectors/get_counts.go`
+
+---
+
+## Task 11.4: Peer Topology Collector
+
+**Objective**: Poll `peers` and export per-peer and aggregate network metrics.
+
+**What to do**:
+
+- Implement `peersCollector`:
+
+  **Aggregate Gauges:**
+  - `xrpl_peers_inbound_count`
+  - `xrpl_peers_outbound_count`
+  - `xrpl_peers_cluster_count`
+
+  **Per-Peer Gauges** (with labels `peer_key` truncated to 8 chars for cardinality control):
+  - `xrpl_peer_latency_ms{peer="<key>", version="<ver>", inbound="<bool>"}`
+  - `xrpl_peer_uptime_seconds{peer="<key>"}`
+  - `xrpl_peer_load{peer="<key>"}`
+
+  **Distribution Gauges** (aggregated across all peers):
+  - `xrpl_peer_latency_p50_ms`, `xrpl_peer_latency_p95_ms`, `xrpl_peer_latency_p99_ms`
+  - `xrpl_peer_version_count{version="<semver>"}` — count of peers per software version
+
+  **Tracking Status:**
+  - `xrpl_peer_diverged_count` — peers with `track=diverged`
+  - `xrpl_peer_unknown_count` — peers with `track=unknown`
+
+**Key files**:
+
+- New: `docker/telemetry/otel-rippled-receiver/collectors/peers.go`
+
+**Cardinality note**: Per-peer metrics use truncated keys. For large peer sets (50+), the aggregate distribution gauges are preferred over per-peer labels.
+
+---
+
+## Task 11.5: Validator & Amendment Collector
+
+**Objective**: Poll `validators` and `feature` to export validator health and amendment voting status.
+
+**What to do**:
+
+- Implement `validatorCollector`:
+
+  **From `validators` RPC:**
+  - `xrpl_trusted_validators_count`
+  - `xrpl_validator_signing` (0 or 1 — whether local validator is signing)
+
+  **From `feature` RPC:**
+  - `xrpl_amendment_enabled_count` — total enabled amendments
+  - `xrpl_amendment_majority_count` — amendments with majority but not yet enabled
+  - `xrpl_amendment_vetoed_count` — locally vetoed amendments
+  - `xrpl_amendment_unsupported_majority` (0 or 1) — any unsupported amendment has majority (critical alert)
+
+  **Per-amendment with majority** (limited cardinality — only amendments with `majority` set):
+  - `xrpl_amendment_majority_time{name="<amendment>"}` — epoch time when majority was gained
+  - `xrpl_amendment_votes{name="<amendment>"}` — current vote count
+  - `xrpl_amendment_threshold{name="<amendment>"}` — votes needed
+
+**Key files**:
+
+- New: `docker/telemetry/otel-rippled-receiver/collectors/validators.go`
+
+---
+
+## Task 11.6: Fee & TxQ Collector
+
+**Objective**: Poll `fee` RPC and export real-time fee market data.
+
+**What to do**:
+
+- Implement `feeCollector` that calls the public `fee` RPC:
+
+  **Fee Level Gauges:**
+  - `xrpl_fee_current_ledger_size` — transactions in current open ledger
+  - `xrpl_fee_expected_ledger_size` — expected transactions at close
+  - `xrpl_fee_max_queue_size` — maximum transaction queue size
+  - `xrpl_fee_open_ledger_fee_drops` — minimum fee for open ledger inclusion
+  - `xrpl_fee_median_fee_drops` — median fee level
+  - `xrpl_fee_minimum_fee_drops` — base reference fee
+  - `xrpl_fee_queue_size` — current queue depth
+
+- This overlaps with Phase 9's internal TxQ metrics but provides an external-only collection path that doesn't require rippled code changes.
+
+**Key files**:
+
+- New: `docker/telemetry/otel-rippled-receiver/collectors/fee.go`
+
+---
+
+## Task 11.7: DEX & AMM Collector (Optional)
+
+**Objective**: Periodically poll configured AMM pools and order book pairs for DeFi metrics.
+
+**What to do**:
+
+- Implement `dexCollector` (enabled only when `amm_pools` or `book_offers_pairs` are configured):
+
+  **AMM Pool Gauges** (per configured pool):
+  - `xrpl_amm_reserve{pool="<id>", asset="<currency>"}` — pool reserve amount
+  - `xrpl_amm_lp_token_supply{pool="<id>"}` — outstanding LP tokens
+  - `xrpl_amm_trading_fee{pool="<id>"}` — pool trading fee (basis points)
+  - `xrpl_amm_tvl_drops{pool="<id>"}` — total value locked (XRP-denominated)
+
+  **Order Book Gauges** (per configured pair):
+  - `xrpl_orderbook_bid_depth{pair="<base>/<quote>"}` — total bid volume
+  - `xrpl_orderbook_ask_depth{pair="<base>/<quote>"}` — total ask volume
+  - `xrpl_orderbook_spread{pair="<base>/<quote>"}` — best bid-ask spread
+  - `xrpl_orderbook_offer_count{pair="<base>/<quote>", side="bid|ask"}` — number of offers
+
+**Key files**:
+
+- New: `docker/telemetry/otel-rippled-receiver/collectors/dex.go`
+
+**Note**: This is optional because it requires explicit configuration of which pools/pairs to track. Default configuration tracks no DEX data.
+
+---
+
+## Task 11.8: Prometheus Alerting Rules
+
+**Objective**: Create production-ready alerting rules for the metrics exported by this receiver.
+
+**What to do**:
+
+- Create `docker/telemetry/prometheus/rippled-alerts.yml`:
+
+  **Tier 1 — Critical (page immediately):**
+
+  ```yaml
+  - alert: XRPLServerNotFull
+    expr: xrpl_server_state < 4
+    for: 15m
+
+  - alert: XRPLAmendmentBlocked
+    expr: xrpl_amendment_blocked == 1
+    for: 1m
+
+  - alert: XRPLNoPeers
+    expr: xrpl_peers_count == 0
+    for: 5m
+
+  - alert: XRPLLedgerStale
+    expr: xrpl_validated_ledger_age_seconds > 120
+    for: 2m
+
+  - alert: XRPLHighIOLatency
+    expr: xrpl_io_latency_ms > 100
+    for: 5m
+
+  - alert: XRPLUnsupportedAmendmentMajority
+    expr: xrpl_amendment_unsupported_majority == 1
+    for: 1m
+  ```
+
+  **Tier 2 — Warning (investigate within hours):**
+
+  ```yaml
+  - alert: XRPLLowPeerCount
+    expr: xrpl_peers_count < 10
+    for: 15m
+
+  - alert: XRPLHighLoadFactor
+    expr: xrpl_load_factor > 10
+    for: 10m
+
+  - alert: XRPLSlowConsensus
+    expr: xrpl_last_close_converge_time_seconds > 6
+    for: 5m
+
+  - alert: XRPLValidatorListExpiring
+    expr: (xrpl_validator_list_expiration_seconds - time()) < 86400
+    for: 1h
+
+  - alert: XRPLClockDrift
+    expr: xrpl_close_time_offset_seconds > 0
+    for: 5m
+
+  - alert: XRPLStateFlapping
+    expr: rate(xrpl_state_transitions_total{state="full"}[1h]) > 2
+    for: 30m
+  ```
+
+**Key files**:
+
+- New: `docker/telemetry/prometheus/rippled-alerts.yml`
+- Update: `docker/telemetry/prometheus/prometheus.yml` (add rule_files reference)
+
+---
+
+## Task 11.9: New Grafana Dashboards
+
+**Objective**: Create 4 new dashboards for the data exported by the receiver.
+
+**What to do**:
+
+- **Validator Health** (`rippled-validator-health`):
+  - Server state timeline, state duration breakdown
+  - Proposer count trend, converge time trend, validation quorum
+  - Validator list expiration countdown
+  - Amendment voting status (majority/enabled/vetoed)
+
+- **Network Topology** (`rippled-network-topology`):
+  - Peer count (inbound/outbound/cluster), peer version distribution
+  - Peer latency distribution (p50/p95/p99), diverged peer count
+  - Geographic distribution (if enriched with GeoIP)
+  - Peer uptime distribution
+
+- **Fee Market** (`rippled-fee-market-external`):
+  - Current fee levels (open ledger, median, minimum), fee escalation timeline
+  - Queue depth vs. capacity, transactions per ledger
+  - Load factor breakdown (server/network/cluster/escalation)
+
+- **DEX & AMM Overview** (`rippled-dex-amm`) (only populated when DEX collectors are configured):
+  - AMM pool TVL, reserve ratios, LP token supply
+  - Order book depth per pair, spread trends
+  - Trading fee revenue estimates
+
+**Key files**:
+
+- New: `docker/telemetry/grafana/dashboards/rippled-validator-health.json`
+- New: `docker/telemetry/grafana/dashboards/rippled-network-topology.json`
+- New: `docker/telemetry/grafana/dashboards/rippled-fee-market-external.json`
+- New: `docker/telemetry/grafana/dashboards/rippled-dex-amm.json`
+
+---
+
+## Task 11.10: Integration with Phase 10 Validation
+
+**Objective**: Extend the Phase 10 validation suite to verify this receiver's metrics.
+
+**What to do**:
+
+- Update `docker/telemetry/workload/validate_telemetry.py`:
+  - Add assertions for all `xrpl_*` metrics produced by the receiver
+  - Verify metric labels have expected values
+  - Verify alerting rules fire correctly (inject a "bad" state and check alert)
+
+- Update `docker/telemetry/docker-compose.workload.yaml`:
+  - Add the custom OTel Collector build with the rippled receiver
+  - Configure the receiver to poll one of the test nodes
+
+**Key files**:
+
+- Update: `docker/telemetry/workload/validate_telemetry.py`
+- Update: `docker/telemetry/docker-compose.workload.yaml`
+- Update: `docker/telemetry/workload/expected_metrics.json`
+
+---
+
+## Task 11.11: Documentation
+
+**Objective**: Document the receiver, its metrics, deployment, and alerting.
+
+**What to do**:
+
+- Create `docker/telemetry/otel-rippled-receiver/README.md`:
+  - Architecture overview (how the receiver fits into the OTel Collector)
+  - Configuration reference (all config options with defaults)
+  - Metric reference table (all exported metrics with types and labels)
+  - Deployment guide (building custom collector binary, docker-compose integration)
+
+- Update `OpenTelemetryPlan/09-data-collection-reference.md`:
+  - Add "Third-Party Metrics (OTel Collector Receiver)" section
+  - Add new Grafana dashboard reference (4 dashboards)
+  - Add alerting rules reference
+
+- Update `docs/telemetry-runbook.md`:
+  - Add "Third-Party Metrics Receiver" troubleshooting section
+  - Add alerting playbook (what to do for each Tier 1/Tier 2 alert)
+
+---
+
+## Exit Criteria
+
+- [ ] Custom OTel Collector receiver builds and starts without errors
+- [ ] All `xrpl_*` metrics from server_info, get_counts, peers, validators, fee appear in Prometheus
+- [ ] Metrics update at configured poll interval (default 30s)
+- [ ] 4 new Grafana dashboards operational with data
+- [ ] Prometheus alerting rules fire correctly for simulated failure conditions
+- [ ] DEX/AMM collector works when configured (optional — not required for base exit criteria)
+- [ ] Phase 10 validation suite passes with receiver metrics included
+- [ ] Receiver handles rippled restart/unavailability gracefully (no crash, logs warning, retries)
+- [ ] Documentation complete: receiver README, metric reference, alerting playbook
+- [ ] Go receiver has unit tests with >80% coverage
diff --git a/OpenTelemetryPlan/Phase9_taskList.md b/OpenTelemetryPlan/Phase9_taskList.md
new file mode 100644
index 0000000000..1b383592f9
--- /dev/null
+++ b/OpenTelemetryPlan/Phase9_taskList.md
@@ -0,0 +1,312 @@
+# Phase 9: Internal Metric Instrumentation Gap Fill — Task List
+
+> **Status**: Future Enhancement
+>
+> **Goal**: Instrument rippled to emit ~50+ metrics that exist in `get_counts`/`server_info`/TxQ/PerfLog but currently lack time-series export via the OTel or beast::insight pipelines.
+>
+> **Scope**: Hybrid approach — extend `beast::insight` for metrics near existing registrations, use OTel Metrics SDK `ObservableGauge` callbacks for new categories (TxQ, PerfLog, CountedObjects).
+>
+> **Branch**: `pratik/otel-phase9-metric-gap-fill` (from `pratik/otel-phase8-log-correlation`)
+>
+> **Depends on**: Phase 7 (native OTel metrics pipeline) and Phase 8 (log-trace correlation)
+
+### Related Plan Documents
+
+| Document                                                             | Relevance                                                      |
+| -------------------------------------------------------------------- | -------------------------------------------------------------- |
+| [06-implementation-phases.md](./06-implementation-phases.md)         | Phase 9 plan: motivation, architecture, exit criteria (§6.8.2) |
+| [09-data-collection-reference.md](./09-data-collection-reference.md) | Current metric inventory + future metrics section              |
+| [Phase7_taskList.md](./Phase7_taskList.md)                           | Prerequisite — OTel Metrics SDK and `OTelCollector` class      |
+| [Phase8_taskList.md](./Phase8_taskList.md)                           | Prerequisite — log-trace correlation                           |
+
+### Third-Party Consumer Context
+
+These metrics serve multiple external consumer categories identified during research:
+
+| Consumer Category         | Key Metrics They Need                                           |
+| ------------------------- | --------------------------------------------------------------- |
+| **Exchanges**             | Fee escalation levels, TxQ depth, settlement latency            |
+| **Payment Processors**    | Load factors, io_latency, transaction throughput                |
+| **Analytics Providers**   | NodeStore I/O, cache hit rates, counted objects                 |
+| **Validators/Operators**  | Per-job execution times, PerfLog RPC counters, consensus timing |
+| **Academic Researchers**  | Consensus performance time-series, fee market dynamics          |
+| **Institutional Custody** | Server health scores, reserve calculations, node availability   |
+
+---
+
+## Task 9.1: NodeStore I/O Metrics
+
+**Objective**: Export node store read/write performance as time-series metrics.
+
+**What to do**:
+
+- In `src/libxrpl/nodestore/Database.cpp`, extend existing `beast::insight` registrations to add:
+  - Gauge: `node_reads_total` (cumulative read operations)
+  - Gauge: `node_reads_hit` (cache-served reads)
+  - Gauge: `node_writes` (cumulative write operations)
+  - Gauge: `node_written_bytes` (cumulative bytes written)
+  - Gauge: `node_read_bytes` (cumulative bytes read)
+  - Gauge: `node_reads_duration_us` (cumulative read time in microseconds)
+  - Gauge: `write_load` (current write load score)
+  - Gauge: `read_queue` (items in read queue)
+
+- These values are already computed in `Database::getCountsJson()` (line ~236). Wire the same counters to `beast::insight` hooks.
+
+**Key modified files**:
+
+- `src/libxrpl/nodestore/Database.cpp`
+- `src/libxrpl/nodestore/Database.h` (add insight members)
+
+**Derived Prometheus metrics**: `rippled_nodestore_reads_total`, `rippled_nodestore_reads_hit`, `rippled_nodestore_write_load`, etc.
+
+**Grafana dashboard**: Add "NodeStore I/O" panel group to _Node Health_ dashboard.
+
+---
+
+## Task 9.2: Cache Hit Rate Metrics
+
+**Objective**: Export SHAMap and ledger cache performance as time-series gauges.
+
+**What to do**:
+
+- Register OTel `ObservableGauge` callbacks (via Phase 7's `OTelCollector`) for:
+  - `SLE_hit_rate` — SLE cache hit rate (0.0–1.0)
+  - `ledger_hit_rate` — Ledger object cache hit rate
+  - `AL_hit_rate` — AcceptedLedger cache hit rate
+  - `treenode_cache_size` — SHAMap TreeNode cache size (entries)
+  - `treenode_track_size` — Tracked tree nodes
+  - `fullbelow_size` — FullBelow cache size
+
+- The callback should read from the same sources as `GetCounts.cpp` handler (line ~43).
+
+- Create a centralized `MetricsRegistry` class that holds all OTel async gauge registrations, polled at 10-second intervals by the `PeriodicMetricReader`.
+
+**Key modified files**:
+
+- New: `src/xrpld/telemetry/MetricsRegistry.h` / `.cpp`
+- `src/xrpld/rpc/handlers/GetCounts.cpp` (extract shared access methods)
+- `src/xrpld/app/main/Application.cpp` (register MetricsRegistry at startup)
+
+**Derived Prometheus metrics**: `rippled_cache_SLE_hit_rate`, `rippled_cache_ledger_hit_rate`, `rippled_cache_treenode_size`, etc.
+
+---
+
+## Task 9.3: Transaction Queue (TxQ) Metrics
+
+**Objective**: Export TxQ depth, capacity, and fee escalation levels as time-series.
+
+**What to do**:
+
+- Register OTel `ObservableGauge` callbacks for TxQ state (from `TxQ.h` line ~143):
+  - `txq_count` — Current transactions in queue
+  - `txq_max_size` — Maximum queue capacity
+  - `txq_in_ledger` — Transactions in current open ledger
+  - `txq_per_ledger` — Expected transactions per ledger
+  - `txq_reference_fee_level` — Reference fee level
+  - `txq_min_processing_fee_level` — Minimum fee to get processed
+  - `txq_med_fee_level` — Median fee level in queue
+  - `txq_open_ledger_fee_level` — Open ledger fee escalation level
+
+- Add to the `MetricsRegistry` (Task 9.2).
+
+**Key modified files**:
+
+- `src/xrpld/telemetry/MetricsRegistry.cpp` (add TxQ callbacks)
+- `src/xrpld/app/tx/detail/TxQ.h` (expose metrics accessor if needed)
+
+**Derived Prometheus metrics**: `rippled_txq_count`, `rippled_txq_max_size`, `rippled_txq_open_ledger_fee_level`, etc.
+
+**Grafana dashboard**: New _Fee Market & TxQ_ dashboard (`rippled-fee-market`).
+
+---
+
+## Task 9.4: PerfLog Per-RPC Method Metrics
+
+**Objective**: Export per-RPC-method call counts and latency as OTel metrics.
+
+**What to do**:
+
+- Register OTel instruments for PerfLog RPC counters (from `PerfLogImp.cpp` line ~63):
+  - Counter: `rpc_method_started_total{method="<name>"}` — calls started
+  - Counter: `rpc_method_finished_total{method="<name>"}` — calls completed
+  - Counter: `rpc_method_errored_total{method="<name>"}` — calls errored
+  - Histogram: `rpc_method_duration_us{method="<name>"}` — execution time distribution
+
+- Use OTel `Counter<int64_t>` and `Histogram<double>` instruments with `method` attribute label.
+
+- Hook into the existing PerfLog callback mechanism rather than adding new instrumentation points.
+
+**Key modified files**:
+
+- `src/xrpld/perflog/detail/PerfLogImp.cpp` (add OTel instrument updates alongside existing JSON counters)
+- `src/xrpld/telemetry/MetricsRegistry.cpp` (register instruments)
+
+**Derived Prometheus metrics**: `rippled_rpc_method_started_total{method="server_info"}`, `rippled_rpc_method_duration_us_bucket{method="ledger"}`, etc.
+
+**Grafana dashboard**: Add "Per-Method RPC Breakdown" panel group to _RPC Performance_ dashboard.
+
+---
+
+## Task 9.5: PerfLog Per-Job-Type Metrics
+
+**Objective**: Export per-job-type queue and execution metrics.
+
+**What to do**:
+
+- Register OTel instruments for PerfLog job counters:
+  - Counter: `job_queued_total{job_type="<name>"}` — jobs queued
+  - Counter: `job_started_total{job_type="<name>"}` — jobs started
+  - Counter: `job_finished_total{job_type="<name>"}` — jobs completed
+  - Histogram: `job_queued_duration_us{job_type="<name>"}` — time spent waiting in queue
+  - Histogram: `job_running_duration_us{job_type="<name>"}` — execution time distribution
+
+- Hook into PerfLog's existing job tracking alongside Task 9.4.
+
+**Key modified files**:
+
+- `src/xrpld/perflog/detail/PerfLogImp.cpp`
+- `src/xrpld/telemetry/MetricsRegistry.cpp`
+
+**Derived Prometheus metrics**: `rippled_job_queued_total{job_type="ledgerData"}`, `rippled_job_running_duration_us_bucket{job_type="transaction"}`, etc.
+
+**Grafana dashboard**: New _Job Queue Analysis_ dashboard (`rippled-job-queue`).
+
+---
+
+## Task 9.6: Counted Object Instance Metrics
+
+**Objective**: Export live instance counts for key internal object types.
+
+**What to do**:
+
+- Register OTel `ObservableGauge` callbacks for `CountedObject<T>` instance counts:
+  - `object_count{type="Transaction"}` — live Transaction objects
+  - `object_count{type="Ledger"}` — live Ledger objects
+  - `object_count{type="NodeObject"}` — live NodeObject instances
+  - `object_count{type="STTx"}` — serialized transaction objects
+  - `object_count{type="STLedgerEntry"}` — serialized ledger entries
+  - `object_count{type="InboundLedger"}` — ledgers being fetched
+  - `object_count{type="Pathfinder"}` — active pathfinding computations
+  - `object_count{type="PathRequest"}` — active path requests
+  - `object_count{type="HashRouterEntry"}` — hash router entries
+
+- The `CountedObject` template already tracks these via atomic counters. The callback just reads the current counts.
+
+**Key modified files**:
+
+- `src/xrpld/telemetry/MetricsRegistry.cpp` (add counted object callbacks)
+- `include/xrpl/basics/CountedObject.h` (may need static accessor for iteration)
+
+**Derived Prometheus metrics**: `rippled_object_count{type="Transaction"}`, `rippled_object_count{type="NodeObject"}`, etc.
+
+**Grafana dashboard**: Add "Object Instance Counts" panel to _Node Health_ dashboard.
+
+---
+
+## Task 9.7: Fee Escalation & Load Factor Metrics
+
+**Objective**: Export the full load factor breakdown as time-series.
+
+**What to do**:
+
+- Register OTel `ObservableGauge` callbacks for load factors (from `NetworkOPs.cpp` line ~2694):
+  - `load_factor` — combined transaction cost multiplier
+  - `load_factor_server` — server + cluster + network contribution
+  - `load_factor_local` — local server load only
+  - `load_factor_net` — network-wide load estimate
+  - `load_factor_cluster` — cluster peer load
+  - `load_factor_fee_escalation` — open ledger fee escalation
+  - `load_factor_fee_queue` — queue entry fee level
+
+- These overlap with some existing StatsD metrics but provide finer granularity (individual factor breakdown vs. combined value).
+
+**Key modified files**:
+
+- `src/xrpld/telemetry/MetricsRegistry.cpp`
+- `src/xrpld/app/misc/NetworkOPs.cpp` (expose load factor accessors if needed)
+
+**Derived Prometheus metrics**: `rippled_load_factor`, `rippled_load_factor_fee_escalation`, etc.
+
+**Grafana dashboard**: Add "Load Factor Breakdown" panel to _Fee Market & TxQ_ dashboard.
+
+---
+
+## Task 9.8: New Grafana Dashboards
+
+**Objective**: Create Grafana dashboards for the new metric categories.
+
+**What to do**:
+
+- Create 2 new dashboards:
+  1. **Fee Market & TxQ** (`rippled-fee-market`) — TxQ depth/capacity, fee levels, load factor breakdown, fee escalation timeline
+  2. **Job Queue Analysis** (`rippled-job-queue`) — Per-job-type rates, queue wait times, execution times, job queue depth
+
+- Update 2 existing dashboards:
+  1. **Node Health** (`rippled-statsd-node-health`) — Add NodeStore I/O panels, cache hit rate panels, object instance counts
+  2. **RPC Performance** (`rippled-rpc-perf`) — Add per-method RPC breakdown panels
+
+**Key modified files**:
+
+- New: `docker/telemetry/grafana/dashboards/rippled-fee-market.json`
+- New: `docker/telemetry/grafana/dashboards/rippled-job-queue.json`
+- `docker/telemetry/grafana/dashboards/rippled-statsd-node-health.json`
+- `docker/telemetry/grafana/dashboards/rippled-rpc-perf.json`
+
+---
+
+## Task 9.9: Update Documentation
+
+**Objective**: Update telemetry reference docs with all new metrics.
+
+**What to do**:
+
+- Update `OpenTelemetryPlan/09-data-collection-reference.md`:
+  - Add new section for OTel SDK-exported metrics (NodeStore, cache, TxQ, PerfLog, CountedObjects, load factors)
+  - Update Grafana dashboard reference table (add 2 new dashboards)
+  - Add Prometheus query examples for new metrics
+
+- Update `docs/telemetry-runbook.md`:
+  - Add alerting rules for new metrics (NodeStore write_load, TxQ capacity, cache hit rate degradation)
+  - Add troubleshooting entries for new metric categories
+
+**Key modified files**:
+
+- `OpenTelemetryPlan/09-data-collection-reference.md`
+- `docs/telemetry-runbook.md`
+
+---
+
+## Task 9.10: Integration Tests
+
+**Objective**: Verify all new metrics appear in Prometheus after a test workload.
+
+**What to do**:
+
+- Extend the existing telemetry integration test:
+  - Start rippled with `[telemetry] enabled=1` and `[insight] server=otel`
+  - Submit a batch of RPC calls and transactions
+  - Query Prometheus for each new metric family
+  - Assert non-zero values for: NodeStore reads, cache hit rates, TxQ count, PerfLog RPC counters, object counts, load factors
+
+- Add unit tests for the `MetricsRegistry` class:
+  - Verify callback registration and deregistration
+  - Verify metric values match `get_counts` JSON output
+  - Verify graceful behavior when telemetry is disabled
+
+**Key modified files**:
+
+- `src/test/telemetry/MetricsRegistry_test.cpp` (new)
+- Existing integration test script (extend assertions)
+
+---
+
+## Exit Criteria
+
+- [ ] All ~50 new metrics visible in Prometheus via OTLP pipeline
+- [ ] `MetricsRegistry` class registers/deregisters cleanly with OTel SDK
+- [ ] Async gauge callbacks execute at 10s intervals without performance impact
+- [ ] 2 new Grafana dashboards operational (Fee Market, Job Queue)
+- [ ] 2 existing dashboards updated with new panel groups
+- [ ] Integration test validates all new metric families are non-zero
+- [ ] No performance regression (< 0.5% CPU overhead from new callbacks)
+- [ ] Documentation updated with full new metric inventory
diff --git a/docker/telemetry/grafana/dashboards/rippled-fee-market.json b/docker/telemetry/grafana/dashboards/rippled-fee-market.json
new file mode 100644
index 0000000000..85fb1aa102
--- /dev/null
+++ b/docker/telemetry/grafana/dashboards/rippled-fee-market.json
@@ -0,0 +1,343 @@
+{
+  "annotations": {
+    "list": []
+  },
+  "description": "Fee market dynamics: TxQ depth/capacity, fee escalation levels, and load factor breakdown. Sourced from OTel MetricsRegistry observable gauges (Phase 9).",
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "title": "Transaction Queue Depth",
+      "description": "Current number of transactions waiting in the queue vs. maximum capacity. Sourced from MetricsRegistry txq_metrics observable gauge with metric=txq_count and metric=txq_max_size.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_count\"}",
+          "legendFormat": "Queue Depth [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_max_size\"}",
+          "legendFormat": "Max Capacity [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Transactions",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Transactions Per Ledger",
+      "description": "Transactions in the current open ledger vs. expected per-ledger count. Sourced from txq_metrics with metric=txq_in_ledger and metric=txq_per_ledger.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_in_ledger\"}",
+          "legendFormat": "In Ledger [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_per_ledger\"}",
+          "legendFormat": "Expected Per Ledger [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Transactions",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Fee Escalation Levels",
+      "description": "Fee levels that control transaction queue admission. Reference fee level is the baseline; open ledger fee level triggers escalation. Sourced from txq_metrics observable gauge.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 8
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_reference_fee_level\"}",
+          "legendFormat": "Reference Fee Level [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_min_processing_fee_level\"}",
+          "legendFormat": "Min Processing Fee Level [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_med_fee_level\"}",
+          "legendFormat": "Median Fee Level [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_open_ledger_fee_level\"}",
+          "legendFormat": "Open Ledger Fee Level [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Fee Level",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 5,
+            "scaleDistribution": {
+              "type": "log",
+              "log": 2
+            }
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Load Factor Breakdown",
+      "description": "Decomposed load factor components: server (max of local, net, cluster), fee escalation, fee queue, and combined. Values are unitless multipliers where 1.0 = no load. Sourced from load_factor_metrics observable gauge.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor\"}",
+          "legendFormat": "Combined Load Factor [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_server\"}",
+          "legendFormat": "Server [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_fee_escalation\"}",
+          "legendFormat": "Fee Escalation [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_fee_queue\"}",
+          "legendFormat": "Fee Queue [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Multiplier",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 5
+          },
+          "color": {
+            "mode": "palette-classic"
+          },
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 2
+              },
+              {
+                "color": "red",
+                "value": 10
+              }
+            ]
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Load Factor Components",
+      "description": "Individual load factor contributors: local server load, network load, and cluster load. Only differ from 1.0 under load conditions. Sourced from load_factor_metrics observable gauge.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_local\"}",
+          "legendFormat": "Local [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_net\"}",
+          "legendFormat": "Network [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_cluster\"}",
+          "legendFormat": "Cluster [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Multiplier",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 5
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    }
+  ],
+  "schemaVersion": 39,
+  "tags": ["rippled", "otel", "fee-market"],
+  "templating": {
+    "list": [
+      {
+        "name": "node",
+        "label": "Node",
+        "description": "Filter by rippled node (service.instance.id)",
+        "type": "query",
+        "query": "label_values(exported_instance)",
+        "datasource": {
+          "type": "prometheus",
+          "uid": "prometheus"
+        },
+        "includeAll": true,
+        "allValue": ".*",
+        "current": {
+          "text": "All",
+          "value": "$__all"
+        },
+        "multi": true,
+        "refresh": 2,
+        "sort": 1
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Fee Market & TxQ",
+  "uid": "rippled-fee-market",
+  "version": 1
+}
diff --git a/docker/telemetry/grafana/dashboards/rippled-job-queue.json b/docker/telemetry/grafana/dashboards/rippled-job-queue.json
new file mode 100644
index 0000000000..e29b96f750
--- /dev/null
+++ b/docker/telemetry/grafana/dashboards/rippled-job-queue.json
@@ -0,0 +1,395 @@
+{
+  "annotations": {
+    "list": []
+  },
+  "description": "Job queue analysis: per-job-type throughput rates, queue wait times, and execution times. Sourced from OTel MetricsRegistry synchronous counters and histograms (Phase 9).",
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "title": "Job Throughput Rate (Per Second)",
+      "description": "Rate of jobs queued, started, and finished across all job types. Computed as rate() over the OTel counter values. High queue rates with low finish rates indicate backlog.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "sum by (exported_instance) (rate(rippled_job_queued_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
+          "legendFormat": "Queued/s [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "sum by (exported_instance) (rate(rippled_job_started_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
+          "legendFormat": "Started/s [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "sum by (exported_instance) (rate(rippled_job_finished_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
+          "legendFormat": "Finished/s [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10,
+            "axisLabel": "Operations / Sec",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Per-Job-Type Queued Rate",
+      "description": "Rate of jobs queued broken down by job_type label. Identifies which job types contribute most to queue activity.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(10, rate(rippled_job_queued_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
+          "legendFormat": "{{job_type}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 1,
+            "fillOpacity": 5,
+            "axisLabel": "Operations / Sec",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Per-Job-Type Finish Rate",
+      "description": "Rate of jobs completing broken down by job_type. Compare with queued rate to identify backlog per type.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(10, rate(rippled_job_finished_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
+          "legendFormat": "{{job_type}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 1,
+            "fillOpacity": 5,
+            "axisLabel": "Operations / Sec",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Job Queue Wait Time (P50, P95, P99)",
+      "description": "Histogram quantiles for time jobs spend waiting in the queue before execution starts. High values indicate thread pool saturation.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(rippled_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
+          "legendFormat": "P50 [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
+          "legendFormat": "P95 [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(rippled_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
+          "legendFormat": "P99 [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "us",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 5,
+            "axisLabel": "Duration (μs)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Job Execution Time (P50, P95, P99)",
+      "description": "Histogram quantiles for actual job execution time. High values indicate expensive operations or resource contention.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(rippled_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
+          "legendFormat": "P50 [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
+          "legendFormat": "P95 [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(rippled_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
+          "legendFormat": "P99 [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "us",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 5,
+            "axisLabel": "Duration (μs)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Per-Job-Type Execution Time (P95)",
+      "description": "95th percentile execution time broken down by job type. Identifies the slowest job types.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 24
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(10, histogram_quantile(0.95, sum by (le, job_type, exported_instance) (rate(rippled_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))))",
+          "legendFormat": "{{job_type}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "us",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 1,
+            "fillOpacity": 5,
+            "axisLabel": "Duration (μs)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    }
+  ],
+  "schemaVersion": 39,
+  "tags": ["rippled", "otel", "job-queue"],
+  "templating": {
+    "list": [
+      {
+        "name": "node",
+        "label": "Node",
+        "description": "Filter by rippled node (service.instance.id)",
+        "type": "query",
+        "query": "label_values(exported_instance)",
+        "datasource": {
+          "type": "prometheus",
+          "uid": "prometheus"
+        },
+        "includeAll": true,
+        "allValue": ".*",
+        "current": {
+          "text": "All",
+          "value": "$__all"
+        },
+        "multi": true,
+        "refresh": 2,
+        "sort": 1
+      },
+      {
+        "name": "job_type",
+        "label": "Job Type",
+        "description": "Filter by job type",
+        "type": "query",
+        "query": "label_values(rippled_job_queued_total, job_type)",
+        "datasource": {
+          "type": "prometheus",
+          "uid": "prometheus"
+        },
+        "includeAll": true,
+        "allValue": ".*",
+        "current": {
+          "text": "All",
+          "value": "$__all"
+        },
+        "multi": true,
+        "refresh": 2,
+        "sort": 1
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Job Queue Analysis",
+  "uid": "rippled-job-queue",
+  "version": 1
+}
diff --git a/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json b/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json
new file mode 100644
index 0000000000..577ff69783
--- /dev/null
+++ b/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json
@@ -0,0 +1,404 @@
+{
+  "annotations": {
+    "list": []
+  },
+  "description": "Per-RPC-method performance: call rates, error rates, and latency distributions. Sourced from OTel MetricsRegistry synchronous counters and histograms (Phase 9).",
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "title": "RPC Call Rate (All Methods)",
+      "description": "Aggregate rate of RPC calls started, finished, and errored across all methods. Computed as rate() over OTel counters.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "sum by (exported_instance) (rate(rippled_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
+          "legendFormat": "Started/s [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "sum by (exported_instance) (rate(rippled_rpc_method_finished_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
+          "legendFormat": "Finished/s [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "sum by (exported_instance) (rate(rippled_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
+          "legendFormat": "Errored/s [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10,
+            "axisLabel": "Operations / Sec",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Per-Method Call Rate (Top 10)",
+      "description": "Per-method RPC call rate, showing the 10 most active methods. Useful for identifying hot paths.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(10, rate(rippled_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
+          "legendFormat": "{{method}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 1,
+            "fillOpacity": 5,
+            "axisLabel": "Operations / Sec",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Per-Method Error Rate (Top 10)",
+      "description": "Per-method RPC error rate. Non-zero values warrant investigation. Common culprits: invalid parameters, resource exhaustion.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(10, rate(rippled_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
+          "legendFormat": "{{method}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 1,
+            "fillOpacity": 5,
+            "axisLabel": "Operations / Sec",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "RPC Latency (P50, P95, P99) - All Methods",
+      "description": "Histogram quantiles for RPC execution time across all methods. Sourced from rpc_method_duration_us histogram.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(rippled_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))",
+          "legendFormat": "P50 [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))",
+          "legendFormat": "P95 [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(rippled_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))",
+          "legendFormat": "P99 [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "us",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 5,
+            "axisLabel": "Duration (μs)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Per-Method Latency P95 (Top 10 Slowest)",
+      "description": "95th percentile execution time per method. Identifies the slowest RPC endpoints.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(10, histogram_quantile(0.95, sum by (le, method, exported_instance) (rate(rippled_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m]))))",
+          "legendFormat": "{{method}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "us",
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 1,
+            "fillOpacity": 5,
+            "axisLabel": "Duration (μs)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "RPC Error Ratio by Method",
+      "description": "Error ratio (errors / total started) per method. Values above 0.05 (5%) warrant investigation.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 24
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(10, rate(rippled_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]) / (rate(rippled_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]) > 0))",
+          "legendFormat": "{{method}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percentunit",
+          "min": 0,
+          "max": 1,
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 1,
+            "fillOpacity": 5,
+            "axisLabel": "Error Ratio",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          },
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 0.05
+              },
+              {
+                "color": "red",
+                "value": 0.25
+              }
+            ]
+          }
+        },
+        "overrides": []
+      }
+    }
+  ],
+  "schemaVersion": 39,
+  "tags": ["rippled", "otel", "rpc"],
+  "templating": {
+    "list": [
+      {
+        "name": "node",
+        "label": "Node",
+        "description": "Filter by rippled node (service.instance.id)",
+        "type": "query",
+        "query": "label_values(exported_instance)",
+        "datasource": {
+          "type": "prometheus",
+          "uid": "prometheus"
+        },
+        "includeAll": true,
+        "allValue": ".*",
+        "current": {
+          "text": "All",
+          "value": "$__all"
+        },
+        "multi": true,
+        "refresh": 2,
+        "sort": 1
+      },
+      {
+        "name": "method",
+        "label": "RPC Method",
+        "description": "Filter by RPC method",
+        "type": "query",
+        "query": "label_values(rippled_rpc_method_started_total, method)",
+        "datasource": {
+          "type": "prometheus",
+          "uid": "prometheus"
+        },
+        "includeAll": true,
+        "allValue": ".*",
+        "current": {
+          "text": "All",
+          "value": "$__all"
+        },
+        "multi": true,
+        "refresh": 2,
+        "sort": 1
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "RPC Performance (OTel)",
+  "uid": "rippled-rpc-perf",
+  "version": 1
+}
diff --git a/docker/telemetry/grafana/dashboards/system-node-health.json b/docker/telemetry/grafana/dashboards/system-node-health.json
index 456c62b2e1..546a5f12a2 100644
--- a/docker/telemetry/grafana/dashboards/system-node-health.json
+++ b/docker/telemetry/grafana/dashboards/system-node-health.json
@@ -52,7 +52,8 @@
                 "value": 20
               }
             ]
-          }
+          },
+          "custom": {}
         },
         "overrides": []
       }
@@ -100,7 +101,8 @@
                 "value": 20
               }
             ]
-          }
+          },
+          "custom": {}
         },
         "overrides": []
       }
@@ -351,7 +353,8 @@
       ],
       "fieldConfig": {
         "defaults": {
-          "unit": "ops"
+          "unit": "ops",
+          "custom": {}
         },
         "overrides": []
       }
@@ -395,6 +398,324 @@
                 "value": 0.01
               }
             ]
+          },
+          "custom": {}
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "--- OTel: NodeStore I/O ---",
+      "type": "row",
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 32
+      },
+      "collapsed": false,
+      "panels": []
+    },
+    {
+      "title": "NodeStore Read/Write Totals",
+      "description": "Cumulative NodeStore read and write operation counts. Sourced from MetricsRegistry nodestore_state observable gauge with metric=node_reads_total, node_writes, node_reads_hit.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 33
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_total\"}",
+          "legendFormat": "Reads Total [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_hit\"}",
+          "legendFormat": "Reads Hit (cache) [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"node_writes\"}",
+          "legendFormat": "Writes Total [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Operations",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "NodeStore Write Load & Read Queue",
+      "description": "Instantaneous write load score and read queue depth. High write load indicates backend pressure. High read queue indicates prefetch thread saturation.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 33
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"write_load\"}",
+          "legendFormat": "Write Load [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"read_queue\"}",
+          "legendFormat": "Read Queue [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Count",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          },
+          "thresholds": {
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 100
+              },
+              {
+                "color": "red",
+                "value": 1000
+              }
+            ]
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "--- OTel: Cache Hit Rates ---",
+      "type": "row",
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 41
+      },
+      "collapsed": false,
+      "panels": []
+    },
+    {
+      "title": "Cache Hit Rates",
+      "description": "Hit rates for SLE cache, Ledger cache, and AcceptedLedger cache. Values from 0.0 to 1.0. Low values indicate cache thrashing. Sourced from MetricsRegistry cache_metrics observable gauge.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 42
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"SLE_hit_rate\"}",
+          "legendFormat": "SLE Hit Rate [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"ledger_hit_rate\"}",
+          "legendFormat": "Ledger Hit Rate [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"AL_hit_rate\"}",
+          "legendFormat": "AcceptedLedger Hit Rate [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percentunit",
+          "min": 0,
+          "max": 1,
+          "custom": {
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10,
+            "axisLabel": "Hit Rate",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Cache Sizes",
+      "description": "TreeNode cache size, TreeNode track size, and FullBelow cache size. Sourced from MetricsRegistry cache_metrics observable gauge.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 42
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"treenode_cache_size\"}",
+          "legendFormat": "TreeNode Cache [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"treenode_track_size\"}",
+          "legendFormat": "TreeNode Track [{{exported_instance}}]"
+        },
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"fullbelow_size\"}",
+          "legendFormat": "FullBelow [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Entries",
+            "drawStyle": "line",
+            "lineWidth": 2,
+            "fillOpacity": 10
+          },
+          "color": {
+            "mode": "palette-classic"
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "--- OTel: Object Instance Counts ---",
+      "type": "row",
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 50
+      },
+      "collapsed": false,
+      "panels": []
+    },
+    {
+      "title": "Object Instance Counts",
+      "description": "Live instance counts for key internal object types tracked by CountedObject<T>. Sourced from MetricsRegistry object_count observable gauge. High counts may indicate memory pressure or object leaks.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 51
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["last", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "topk(15, rippled_object_count{exported_instance=~\"$node\", type=~\"$type\"})",
+          "legendFormat": "{{type}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "none",
+          "custom": {
+            "axisLabel": "Instances",
+            "drawStyle": "line",
+            "lineWidth": 1,
+            "fillOpacity": 5
+          },
+          "color": {
+            "mode": "palette-classic"
           }
         },
         "overrides": []
@@ -402,7 +723,7 @@
     }
   ],
   "schemaVersion": 39,
-  "tags": ["rippled", "statsd", "node-health", "telemetry"],
+  "tags": ["rippled", "statsd", "otel", "node-health", "telemetry"],
   "templating": {
     "list": [
       {
@@ -424,6 +745,26 @@
         "multi": true,
         "refresh": 2,
         "sort": 1
+      },
+      {
+        "name": "type",
+        "label": "Object Type",
+        "description": "Filter by internal object type (CountedObject<T> class name)",
+        "type": "query",
+        "query": "label_values(rippled_object_count, type)",
+        "datasource": {
+          "type": "prometheus",
+          "uid": "prometheus"
+        },
+        "includeAll": true,
+        "allValue": ".*",
+        "current": {
+          "text": "All",
+          "value": "$__all"
+        },
+        "multi": true,
+        "refresh": 2,
+        "sort": 1
       }
     ]
   },
diff --git a/docker/telemetry/integration-test.sh b/docker/telemetry/integration-test.sh
index 6ebe0b2ecb..0938a02984 100755
--- a/docker/telemetry/integration-test.sh
+++ b/docker/telemetry/integration-test.sh
@@ -355,6 +355,7 @@ trace_transactions=1
 trace_consensus=1
 trace_peer=1
 trace_ledger=1
+metrics_endpoint=http://localhost:4318/v1/metrics
 
 [insight]
 server=otel
@@ -639,6 +640,53 @@ else
     fail "StatsD port 8125 appears to be listening (should not be needed)"
 fi
 
+# ---------------------------------------------------------------------------
+# Step 10c: Verify Phase 9 OTel SDK Metrics
+# ---------------------------------------------------------------------------
+log ""
+log "--- Phase 9: OTel SDK Metrics (MetricsRegistry) ---"
+log "Waiting 15s for OTel metric export + Prometheus scrape..."
+sleep 15
+
+check_otel_metric() {
+    local metric_name="$1"
+    local result
+    result=$(curl -sf "$PROM/api/v1/query?query=$metric_name" \
+        | jq '.data.result | length' 2>/dev/null || echo 0)
+    if [ "$result" -gt 0 ]; then
+        ok "OTel: $metric_name ($result series)"
+    else
+        fail "OTel: $metric_name (0 series)"
+    fi
+}
+
+# Task 9.1: NodeStore I/O
+check_otel_metric 'rippled_nodestore_state{metric="node_reads_total"}'
+check_otel_metric 'rippled_nodestore_state{metric="write_load"}'
+
+# Task 9.2: Cache hit rates
+check_otel_metric 'rippled_cache_metrics{metric="SLE_hit_rate"}'
+check_otel_metric 'rippled_cache_metrics{metric="treenode_cache_size"}'
+
+# Task 9.3: TxQ metrics
+check_otel_metric 'rippled_txq_metrics{metric="txq_count"}'
+check_otel_metric 'rippled_txq_metrics{metric="txq_reference_fee_level"}'
+
+# Task 9.4: Per-RPC metrics
+check_otel_metric "rippled_rpc_method_started_total"
+check_otel_metric "rippled_rpc_method_finished_total"
+
+# Task 9.5: Per-job metrics
+check_otel_metric "rippled_job_queued_total"
+check_otel_metric "rippled_job_finished_total"
+
+# Task 9.6: Counted object instances
+check_otel_metric "rippled_object_count"
+
+# Task 9.7: Load factor breakdown
+check_otel_metric 'rippled_load_factor_metrics{metric="load_factor"}'
+check_otel_metric 'rippled_load_factor_metrics{metric="load_factor_server"}'
+
 # ---------------------------------------------------------------------------
 # Step 11: Summary
 # ---------------------------------------------------------------------------
diff --git a/include/xrpl/core/ServiceRegistry.h b/include/xrpl/core/ServiceRegistry.h
index aa0d9c495c..ff250453b8 100644
--- a/include/xrpl/core/ServiceRegistry.h
+++ b/include/xrpl/core/ServiceRegistry.h
@@ -20,7 +20,8 @@ class PerfLog;
 }
 namespace telemetry {
 class Telemetry;
-}
+class MetricsRegistry;
+}  // namespace telemetry
 
 // This is temporary until we migrate all code to use ServiceRegistry.
 class Application;
@@ -224,6 +225,12 @@ public:
     virtual telemetry::Telemetry&
     getTelemetry() = 0;
 
+    /** Return the MetricsRegistry, or nullptr if telemetry is disabled.
+        Used by PerfLog and other hot paths to record OTel metrics.
+    */
+    virtual telemetry::MetricsRegistry*
+    getMetricsRegistry() = 0;
+
     // Configuration and state
     virtual bool
     isStopping() const = 0;
diff --git a/src/tests/libxrpl/CMakeLists.txt b/src/tests/libxrpl/CMakeLists.txt
index 86e00614e1..2c2bd64acb 100644
--- a/src/tests/libxrpl/CMakeLists.txt
+++ b/src/tests/libxrpl/CMakeLists.txt
@@ -62,5 +62,14 @@ if(telemetry)
         xrpl.test.telemetry
         PRIVATE opentelemetry-cpp::opentelemetry-cpp
     )
+else()
+    # MetricsRegistry lives in xrpld; compile its .cpp directly into the test
+    # target so the no-op path can be tested without linking all of xrpld.
+    # When telemetry=ON, XRPL_ENABLE_TELEMETRY is globally defined and the
+    # .cpp pulls in xrpld symbols we cannot satisfy here.
+    target_sources(
+        xrpl.test.telemetry
+        PRIVATE ${CMAKE_SOURCE_DIR}/src/xrpld/telemetry/MetricsRegistry.cpp
+    )
 endif()
 add_dependencies(xrpl.tests xrpl.test.telemetry)
diff --git a/src/tests/libxrpl/telemetry/MetricsRegistry.cpp b/src/tests/libxrpl/telemetry/MetricsRegistry.cpp
new file mode 100644
index 0000000000..2e11d37819
--- /dev/null
+++ b/src/tests/libxrpl/telemetry/MetricsRegistry.cpp
@@ -0,0 +1,346 @@
+/** GTest unit tests for MetricsRegistry (no-op / telemetry-disabled path).
+ *
+ *  Tests cover:
+ *  - Construction with telemetry disabled (no-op behavior).
+ *  - start()/stop() lifecycle when disabled.
+ *  - Synchronous instrument recording methods do not crash when disabled.
+ *  - Double stop() is safe.
+ *  - Destructor handles cleanup without crash.
+ *
+ *  NOTE: These tests only exercise the no-op path (telemetry disabled).
+ *  When XRPL_ENABLE_TELEMETRY is defined, MetricsRegistry.cpp pulls in
+ *  xrpld symbols that cannot be linked into this standalone test binary,
+ *  so the tests are compiled out.
+ */
+
+// When telemetry is globally enabled, MetricsRegistry.cpp requires xrpld
+// link dependencies we cannot satisfy in a standalone GTest binary.
+#ifndef XRPL_ENABLE_TELEMETRY
+
+#include <xrpld/telemetry/MetricsRegistry.h>
+
+#include <xrpl/core/ServiceRegistry.h>
+
+#include <gtest/gtest.h>
+
+using namespace xrpl;
+
+namespace {
+
+/** Minimal mock ServiceRegistry for MetricsRegistry testing.
+ *
+ *  Only the getMetricsRegistry() call is used in the tests; other methods
+ *  are not invoked because the registry is disabled (enabled=false) so no
+ *  gauge callbacks execute.
+ *
+ *  All pure virtual methods throw to catch accidental calls during tests.
+ */
+class MockServiceRegistry : public ServiceRegistry
+{
+    [[noreturn]] void
+    throwUnimplemented() const
+    {
+        throw std::logic_error("MockServiceRegistry: method not implemented");
+    }
+
+public:
+    // ServiceRegistry interface — stubs that should never be called.
+    CollectorManager&
+    getCollectorManager() override
+    {
+        throwUnimplemented();
+    }
+    Family&
+    getNodeFamily() override
+    {
+        throwUnimplemented();
+    }
+    TimeKeeper&
+    timeKeeper() override
+    {
+        throwUnimplemented();
+    }
+    JobQueue&
+    getJobQueue() override
+    {
+        throwUnimplemented();
+    }
+    NodeCache&
+    getTempNodeCache() override
+    {
+        throwUnimplemented();
+    }
+    CachedSLEs&
+    cachedSLEs() override
+    {
+        throwUnimplemented();
+    }
+    NetworkIDService&
+    getNetworkIDService() override
+    {
+        throwUnimplemented();
+    }
+    AmendmentTable&
+    getAmendmentTable() override
+    {
+        throwUnimplemented();
+    }
+    HashRouter&
+    getHashRouter() override
+    {
+        throwUnimplemented();
+    }
+    LoadFeeTrack&
+    getFeeTrack() override
+    {
+        throwUnimplemented();
+    }
+    LoadManager&
+    getLoadManager() override
+    {
+        throwUnimplemented();
+    }
+    RCLValidations&
+    getValidations() override
+    {
+        throwUnimplemented();
+    }
+    ValidatorList&
+    validators() override
+    {
+        throwUnimplemented();
+    }
+    ValidatorSite&
+    validatorSites() override
+    {
+        throwUnimplemented();
+    }
+    ManifestCache&
+    validatorManifests() override
+    {
+        throwUnimplemented();
+    }
+    ManifestCache&
+    publisherManifests() override
+    {
+        throwUnimplemented();
+    }
+    Overlay&
+    overlay() override
+    {
+        throwUnimplemented();
+    }
+    Cluster&
+    cluster() override
+    {
+        throwUnimplemented();
+    }
+    PeerReservationTable&
+    peerReservations() override
+    {
+        throwUnimplemented();
+    }
+    Resource::Manager&
+    getResourceManager() override
+    {
+        throwUnimplemented();
+    }
+    NodeStore::Database&
+    getNodeStore() override
+    {
+        throwUnimplemented();
+    }
+    SHAMapStore&
+    getSHAMapStore() override
+    {
+        throwUnimplemented();
+    }
+    RelationalDatabase&
+    getRelationalDatabase() override
+    {
+        throwUnimplemented();
+    }
+    InboundLedgers&
+    getInboundLedgers() override
+    {
+        throwUnimplemented();
+    }
+    InboundTransactions&
+    getInboundTransactions() override
+    {
+        throwUnimplemented();
+    }
+    TaggedCache<uint256, AcceptedLedger>&
+    getAcceptedLedgerCache() override
+    {
+        throwUnimplemented();
+    }
+    LedgerMaster&
+    getLedgerMaster() override
+    {
+        throwUnimplemented();
+    }
+    LedgerCleaner&
+    getLedgerCleaner() override
+    {
+        throwUnimplemented();
+    }
+    LedgerReplayer&
+    getLedgerReplayer() override
+    {
+        throwUnimplemented();
+    }
+    PendingSaves&
+    pendingSaves() override
+    {
+        throwUnimplemented();
+    }
+    OpenLedger&
+    openLedger() override
+    {
+        throwUnimplemented();
+    }
+    OpenLedger const&
+    openLedger() const override
+    {
+        throwUnimplemented();
+    }
+    NetworkOPs&
+    getOPs() override
+    {
+        throwUnimplemented();
+    }
+    OrderBookDB&
+    getOrderBookDB() override
+    {
+        throwUnimplemented();
+    }
+    TransactionMaster&
+    getMasterTransaction() override
+    {
+        throwUnimplemented();
+    }
+    TxQ&
+    getTxQ() override
+    {
+        throwUnimplemented();
+    }
+    PathRequests&
+    getPathRequests() override
+    {
+        throwUnimplemented();
+    }
+    ServerHandler&
+    getServerHandler() override
+    {
+        throwUnimplemented();
+    }
+    perf::PerfLog&
+    getPerfLog() override
+    {
+        throwUnimplemented();
+    }
+    telemetry::Telemetry&
+    getTelemetry() override
+    {
+        throwUnimplemented();
+    }
+    telemetry::MetricsRegistry*
+    getMetricsRegistry() override
+    {
+        return nullptr;
+    }
+    bool
+    isStopping() const override
+    {
+        return false;
+    }
+    beast::Journal
+    journal(std::string const&) override
+    {
+        return beast::Journal(beast::Journal::getNullSink());
+    }
+    boost::asio::io_context&
+    getIOContext() override
+    {
+        throwUnimplemented();
+    }
+    Logs&
+    logs() override
+    {
+        throwUnimplemented();
+    }
+    std::optional<uint256> const&
+    trapTxID() const override
+    {
+        static std::optional<uint256> const empty;
+        return empty;
+    }
+    DatabaseCon&
+    getWalletDB() override
+    {
+        throwUnimplemented();
+    }
+    Application&
+    app() override
+    {
+        throwUnimplemented();
+    }
+};
+
+/// Test fixture that provides a MockServiceRegistry and null Journal.
+class MetricsRegistryTest : public ::testing::Test
+{
+protected:
+    MockServiceRegistry mockApp_;
+    beast::Journal j_{beast::Journal::getNullSink()};
+};
+
+}  // namespace
+
+TEST_F(MetricsRegistryTest, disabled_construction)
+{
+    // Construct with enabled=false; should be a no-op.
+    telemetry::MetricsRegistry registry(false, mockApp_, j_);
+    EXPECT_FALSE(registry.isEnabled());
+}
+
+TEST_F(MetricsRegistryTest, disabled_start_stop)
+{
+    telemetry::MetricsRegistry registry(false, mockApp_, j_);
+
+    // start() and stop() should be no-ops when disabled.
+    registry.start("http://localhost:4318/v1/metrics");
+    registry.stop();
+
+    // Double stop should be safe.
+    registry.stop();
+}
+
+TEST_F(MetricsRegistryTest, disabled_recording_methods)
+{
+    telemetry::MetricsRegistry registry(false, mockApp_, j_);
+    registry.start("http://localhost:4318/v1/metrics");
+
+    // All recording methods should be no-ops (not crash).
+    registry.recordRpcStarted("server_info");
+    registry.recordRpcFinished("server_info", 1000);
+    registry.recordRpcErrored("ledger", 500);
+    registry.recordJobQueued("ledgerData");
+    registry.recordJobStarted("ledgerData", 200);
+    registry.recordJobFinished("ledgerData", 3000);
+
+    registry.stop();
+}
+
+TEST_F(MetricsRegistryTest, destructor_calls_stop)
+{
+    {
+        // Let the destructor handle cleanup.
+        telemetry::MetricsRegistry registry(false, mockApp_, j_);
+        registry.start("http://localhost:4318/v1/metrics");
+    }
+    // If we get here without crash, the destructor handled stop.
+}
+
+#endif  // !XRPL_ENABLE_TELEMETRY
diff --git a/src/xrpld/app/main/Application.cpp b/src/xrpld/app/main/Application.cpp
index 4c394de0dc..3d8a59ca85 100644
--- a/src/xrpld/app/main/Application.cpp
+++ b/src/xrpld/app/main/Application.cpp
@@ -29,6 +29,7 @@
 #include <xrpld/overlay/make_Overlay.h>
 #include <xrpld/rpc/detail/PathRequestManager.h>
 #include <xrpld/shamap/NodeFamily.h>
+#include <xrpld/telemetry/MetricsRegistry.h>
 
 #include <xrpl/basics/ByteUtilities.h>
 #include <xrpl/basics/ResolverAsio.h>
@@ -149,6 +150,9 @@ public:
     beast::Journal m_journal;
     std::unique_ptr<perf::PerfLog> perfLog_;
     std::unique_ptr<telemetry::Telemetry> telemetry_;
+    /// OTel metrics registry for gap-fill metrics (counters, histograms,
+    /// observable gauges). Created after telemetry_ during setup().
+    std::unique_ptr<telemetry::MetricsRegistry> metricsRegistry_;
     Application::MutexType m_masterMutex;
 
     // Required by the SHAMapStore
@@ -640,6 +644,12 @@ public:
         return *telemetry_;
     }
 
+    telemetry::MetricsRegistry*
+    getMetricsRegistry() override
+    {
+        return metricsRegistry_.get();
+    }
+
     NodeCache&
     getTempNodeCache() override
     {
@@ -1289,6 +1299,11 @@ ApplicationImp::setup(boost::program_options::variables_map const& cmdline)
     if (!config_->section("telemetry").exists("service_instance_id"))
         telemetry_->setServiceInstanceId(toBase58(TokenType::NodePublic, nodeIdentity_->first));
 
+    // Create the OTel MetricsRegistry for gap-fill metrics (counters,
+    // histograms, observable gauges).  It is started later in start().
+    metricsRegistry_ = std::make_unique<telemetry::MetricsRegistry>(
+        telemetry_->isEnabled(), *this, logs_->journal("MetricsRegistry"));
+
     if (!cluster_->load(config().section(SECTION_CLUSTER_NODES)))
     {
         JLOG(m_journal.fatal()) << "Invalid entry in cluster configuration.";
@@ -1502,6 +1517,24 @@ ApplicationImp::start(bool withTimers)
     ledgerCleaner_->start();
     perfLog_->start();
     telemetry_->start();
+
+    // Start the metrics pipeline after telemetry; the endpoint uses the
+    // same base URL but the /v1/metrics path.
+    if (metricsRegistry_)
+    {
+        auto const& section = config_->section("telemetry");
+        std::string endpoint = "http://localhost:4318/v1/metrics";
+        set(endpoint, "metrics_endpoint", section);
+
+        // Pass the service_instance_id so the MeterProvider Resource
+        // carries it, giving Prometheus an exported_instance label.
+        std::string instanceId;
+        set(instanceId, "service_instance_id", section);
+        if (instanceId.empty() && nodeIdentity_)
+            instanceId = toBase58(TokenType::NodePublic, nodeIdentity_->first);
+
+        metricsRegistry_->start(endpoint, instanceId);
+    }
 }
 
 void
@@ -1592,6 +1625,10 @@ ApplicationImp::run()
     ledgerCleaner_->stop();
     m_nodeStore->stop();
     perfLog_->stop();
+    // Stop metrics pipeline before telemetry — gauge callbacks reference
+    // Application services that may be shutting down.
+    if (metricsRegistry_)
+        metricsRegistry_->stop();
     // Telemetry must stop last among trace-producing components.
     // serverHandler_, overlay_, and jobQueue_ are already stopped above,
     // so no threads should be calling startSpan() at this point.
diff --git a/src/xrpld/perflog/detail/PerfLogImp.cpp b/src/xrpld/perflog/detail/PerfLogImp.cpp
index 960fdcb3ac..4618a9f381 100644
--- a/src/xrpld/perflog/detail/PerfLogImp.cpp
+++ b/src/xrpld/perflog/detail/PerfLogImp.cpp
@@ -1,9 +1,11 @@
 #include <xrpld/perflog/detail/PerfLogImp.h>
+#include <xrpld/telemetry/MetricsRegistry.h>
 
 #include <xrpl/basics/BasicConfig.h>
 #include <xrpl/beast/core/CurrentThreadName.h>
 #include <xrpl/beast/utility/Journal.h>
 #include <xrpl/core/JobTypes.h>
+#include <xrpl/core/ServiceRegistry.h>
 #include <xrpl/json/json_writer.h>
 
 #include <atomic>
@@ -316,6 +318,10 @@ PerfLogImp::rpcStart(std::string const& method, std::uint64_t const requestId)
     }
     std::lock_guard lock(counters_.methodsMutex_);
     counters_.methods_[requestId] = {counter->first.c_str(), steady_clock::now()};
+
+    // Task 9.4: Record RPC start in OTel metrics pipeline.
+    if (auto* mr = app_.getMetricsRegistry())
+        mr->recordRpcStarted(method);
 }
 
 void
@@ -371,6 +377,10 @@ PerfLogImp::jobQueue(JobType const type)
     }
     std::lock_guard lock(counter->second.mutex);
     ++counter->second.value.queued;
+
+    // Task 9.5: Record job enqueue in OTel metrics pipeline.
+    if (auto* mr = app_.getMetricsRegistry())
+        mr->recordJobQueued(JobTypes::name(type));
 }
 
 void
@@ -397,6 +407,10 @@ PerfLogImp::jobStart(
     std::lock_guard lock(counters_.jobsMutex_);
     if (instance >= 0 && instance < counters_.jobs_.size())
         counters_.jobs_[instance] = {type, startTime};
+
+    // Task 9.5: Record job start in OTel metrics pipeline.
+    if (auto* mr = app_.getMetricsRegistry())
+        mr->recordJobStarted(JobTypes::name(type), dur.count());
 }
 
 void
@@ -419,6 +433,10 @@ PerfLogImp::jobFinish(JobType const type, microseconds dur, int instance)
     std::lock_guard lock(counters_.jobsMutex_);
     if (instance >= 0 && instance < counters_.jobs_.size())
         counters_.jobs_[instance] = {jtINVALID, steady_time_point()};
+
+    // Task 9.5: Record job finish in OTel metrics pipeline.
+    if (auto* mr = app_.getMetricsRegistry())
+        mr->recordJobFinished(JobTypes::name(type), dur.count());
 }
 
 void
diff --git a/src/xrpld/telemetry/MetricsRegistry.cpp b/src/xrpld/telemetry/MetricsRegistry.cpp
new file mode 100644
index 0000000000..99c94efc85
--- /dev/null
+++ b/src/xrpld/telemetry/MetricsRegistry.cpp
@@ -0,0 +1,513 @@
+/** MetricsRegistry implementation — OpenTelemetry metric instruments for rippled.
+
+    This file contains:
+    - Construction / destruction logic for the OTel MeterProvider pipeline.
+    - Synchronous instrument creation (counters, histograms) for RPC, job
+      queue, and NodeStore I/O metrics.
+    - Observable gauge callback registration for cache hit rates, TxQ state,
+      CountedObject instances, load factors, and NodeStore queue depth.
+    - No-op stubs when XRPL_ENABLE_TELEMETRY is not defined.
+*/
+
+// On Windows, OTel's spin_lock_mutex.h (transitively included from
+// MetricsRegistry.h) defines _WINSOCKAPI_ and includes <windows.h>.
+// This poisons the include state for boost/asio/detail/socket_types.hpp,
+// which requires winsock2.h to be included first.  Pre-including the
+// boost/asio socket types header gets winsock2.h in before the OTel
+// headers can interfere.
+#ifdef _MSC_VER
+#include <boost/asio/detail/socket_types.hpp>
+#endif
+
+#include <xrpld/telemetry/MetricsRegistry.h>
+
+#ifdef XRPL_ENABLE_TELEMETRY
+
+#include <xrpld/app/ledger/AcceptedLedger.h>
+#include <xrpld/app/ledger/LedgerMaster.h>
+#include <xrpld/app/ledger/OpenLedger.h>
+#include <xrpld/app/misc/TxQ.h>
+
+#include <xrpl/basics/CountedObject.h>
+#include <xrpl/core/ServiceRegistry.h>
+#include <xrpl/nodestore/Database.h>
+#include <xrpl/server/LoadFeeTrack.h>
+
+#include <opentelemetry/context/context.h>
+#include <opentelemetry/exporters/otlp/otlp_http_metric_exporter_factory.h>
+#include <opentelemetry/exporters/otlp/otlp_http_metric_exporter_options.h>
+#include <opentelemetry/metrics/provider.h>
+#include <opentelemetry/sdk/metrics/export/periodic_exporting_metric_reader_factory.h>
+#include <opentelemetry/sdk/metrics/export/periodic_exporting_metric_reader_options.h>
+#include <opentelemetry/sdk/metrics/meter_provider.h>
+#include <opentelemetry/sdk/metrics/meter_provider_factory.h>
+#include <opentelemetry/sdk/resource/resource.h>
+#include <opentelemetry/sdk/resource/semantic_conventions.h>
+
+namespace metric_sdk = opentelemetry::sdk::metrics;
+namespace otlp_http = opentelemetry::exporter::otlp;
+namespace resource = opentelemetry::sdk::resource;
+
+#endif  // XRPL_ENABLE_TELEMETRY
+
+namespace xrpl {
+namespace telemetry {
+
+MetricsRegistry::MetricsRegistry(bool enabled, ServiceRegistry& app, beast::Journal journal)
+    : enabled_(enabled), app_(app), journal_(journal)
+{
+}
+
+MetricsRegistry::~MetricsRegistry()
+{
+    stop();
+}
+
+void
+MetricsRegistry::start(std::string const& endpoint, std::string const& instanceId)
+{
+#ifdef XRPL_ENABLE_TELEMETRY
+    if (!enabled_)
+        return;
+
+    JLOG(journal_.info()) << "MetricsRegistry: starting, endpoint=" << endpoint
+                          << ", instanceId=" << instanceId;
+
+    // Configure OTLP/HTTP metric exporter.
+    otlp_http::OtlpHttpMetricExporterOptions exporterOpts;
+    exporterOpts.url = endpoint;
+    auto exporter = otlp_http::OtlpHttpMetricExporterFactory::Create(exporterOpts);
+
+    // Configure periodic reader with 10-second export interval.
+    metric_sdk::PeriodicExportingMetricReaderOptions readerOpts;
+    readerOpts.export_interval_millis = std::chrono::milliseconds(10000);
+    readerOpts.export_timeout_millis = std::chrono::milliseconds(5000);
+    auto reader =
+        metric_sdk::PeriodicExportingMetricReaderFactory::Create(std::move(exporter), readerOpts);
+
+    // Configure resource attributes so Prometheus exported_instance labels
+    // distinguish metrics from different nodes (matches OTelCollector setup).
+    resource::ResourceAttributes attrs;
+    attrs[resource::SemanticConventions::kServiceName] = "rippled";
+    if (!instanceId.empty())
+        attrs[resource::SemanticConventions::kServiceInstanceId] = instanceId;
+    auto resourceAttrs = resource::Resource::Create(attrs);
+
+    // Create MeterProvider with resource, then attach the metric reader.
+    provider_ = metric_sdk::MeterProviderFactory::Create(
+        std::make_unique<metric_sdk::ViewRegistry>(), resourceAttrs);
+    provider_->AddMetricReader(std::move(reader));
+
+    // Get a meter for all rippled instruments.
+    meter_ = provider_->GetMeter("rippled", "1.0.0");
+
+    // --- Create synchronous instruments ---
+
+    // RPC per-method counters and histogram.
+    rpcStartedCounter_ = meter_->CreateUInt64Counter(
+        "rippled_rpc_method_started_total", "Total RPC method calls started");
+    rpcFinishedCounter_ = meter_->CreateUInt64Counter(
+        "rippled_rpc_method_finished_total", "Total RPC method calls completed successfully");
+    rpcErroredCounter_ = meter_->CreateUInt64Counter(
+        "rippled_rpc_method_errored_total", "Total RPC method calls that errored");
+    rpcDurationHistogram_ = meter_->CreateDoubleHistogram(
+        "rippled_rpc_method_duration_us", "RPC method execution time in microseconds");
+
+    // Job queue per-type counters and histograms.
+    jobQueuedCounter_ =
+        meter_->CreateUInt64Counter("rippled_job_queued_total", "Total jobs enqueued");
+    jobStartedCounter_ =
+        meter_->CreateUInt64Counter("rippled_job_started_total", "Total jobs started");
+    jobFinishedCounter_ =
+        meter_->CreateUInt64Counter("rippled_job_finished_total", "Total jobs completed");
+    jobQueuedDurationHistogram_ = meter_->CreateDoubleHistogram(
+        "rippled_job_queued_duration_us", "Time jobs spent waiting in the queue (microseconds)");
+    jobRunningDurationHistogram_ = meter_->CreateDoubleHistogram(
+        "rippled_job_running_duration_us", "Job execution time in microseconds");
+
+    // Register all observable (async) gauges.
+    registerAsyncGauges();
+
+    JLOG(journal_.info()) << "MetricsRegistry: started successfully";
+#else
+    (void)endpoint;
+#endif  // XRPL_ENABLE_TELEMETRY
+}
+
+void
+MetricsRegistry::stop()
+{
+#ifdef XRPL_ENABLE_TELEMETRY
+    if (!provider_)
+        return;
+
+    JLOG(journal_.info()) << "MetricsRegistry: stopping";
+
+    // Force-flush any pending metrics, then destroy the provider.
+    // This stops the PeriodicExportingMetricReader, which in turn
+    // stops invoking observable gauge callbacks.  No explicit
+    // RemoveCallback is needed — the provider destruction handles it.
+    provider_->ForceFlush();
+    provider_.reset();
+
+    JLOG(journal_.info()) << "MetricsRegistry: stopped";
+#endif  // XRPL_ENABLE_TELEMETRY
+}
+
+// -----------------------------------------------------------------
+// Synchronous instrument recording — RPC metrics (Task 9.4)
+// -----------------------------------------------------------------
+
+void
+MetricsRegistry::recordRpcStarted(std::string_view method)
+{
+#ifdef XRPL_ENABLE_TELEMETRY
+    if (!enabled_ || !rpcStartedCounter_)
+        return;
+    rpcStartedCounter_->Add(1, {{"method", std::string(method)}});
+#else
+    (void)method;
+#endif
+}
+
+void
+MetricsRegistry::recordRpcFinished(std::string_view method, std::int64_t durationUs)
+{
+#ifdef XRPL_ENABLE_TELEMETRY
+    if (!enabled_ || !rpcFinishedCounter_)
+        return;
+    rpcFinishedCounter_->Add(1, {{"method", std::string(method)}});
+    if (rpcDurationHistogram_)
+        rpcDurationHistogram_->Record(
+            static_cast<double>(durationUs),
+            {{"method", std::string(method)}},
+            opentelemetry::context::Context{});
+#else
+    (void)method;
+    (void)durationUs;
+#endif
+}
+
+void
+MetricsRegistry::recordRpcErrored(std::string_view method, std::int64_t durationUs)
+{
+#ifdef XRPL_ENABLE_TELEMETRY
+    if (!enabled_ || !rpcErroredCounter_)
+        return;
+    rpcErroredCounter_->Add(1, {{"method", std::string(method)}});
+    if (rpcDurationHistogram_)
+        rpcDurationHistogram_->Record(
+            static_cast<double>(durationUs),
+            {{"method", std::string(method)}},
+            opentelemetry::context::Context{});
+#else
+    (void)method;
+    (void)durationUs;
+#endif
+}
+
+// -----------------------------------------------------------------
+// Synchronous instrument recording — Job Queue metrics (Task 9.5)
+// -----------------------------------------------------------------
+
+void
+MetricsRegistry::recordJobQueued(std::string_view jobType)
+{
+#ifdef XRPL_ENABLE_TELEMETRY
+    if (!enabled_ || !jobQueuedCounter_)
+        return;
+    jobQueuedCounter_->Add(1, {{"job_type", std::string(jobType)}});
+#else
+    (void)jobType;
+#endif
+}
+
+void
+MetricsRegistry::recordJobStarted(std::string_view jobType, std::int64_t queuedDurUs)
+{
+#ifdef XRPL_ENABLE_TELEMETRY
+    if (!enabled_ || !jobStartedCounter_)
+        return;
+    jobStartedCounter_->Add(1, {{"job_type", std::string(jobType)}});
+    if (jobQueuedDurationHistogram_)
+        jobQueuedDurationHistogram_->Record(
+            static_cast<double>(queuedDurUs),
+            {{"job_type", std::string(jobType)}},
+            opentelemetry::context::Context{});
+#else
+    (void)jobType;
+    (void)queuedDurUs;
+#endif
+}
+
+void
+MetricsRegistry::recordJobFinished(std::string_view jobType, std::int64_t runningDurUs)
+{
+#ifdef XRPL_ENABLE_TELEMETRY
+    if (!enabled_ || !jobFinishedCounter_)
+        return;
+    jobFinishedCounter_->Add(1, {{"job_type", std::string(jobType)}});
+    if (jobRunningDurationHistogram_)
+        jobRunningDurationHistogram_->Record(
+            static_cast<double>(runningDurUs),
+            {{"job_type", std::string(jobType)}},
+            opentelemetry::context::Context{});
+#else
+    (void)jobType;
+    (void)runningDurUs;
+#endif
+}
+
+// -----------------------------------------------------------------
+// Observable gauge callbacks (Tasks 9.1, 9.2, 9.3, 9.6, 9.7)
+// -----------------------------------------------------------------
+
+#ifdef XRPL_ENABLE_TELEMETRY
+
+void
+MetricsRegistry::registerAsyncGauges()
+{
+    // --- Task 9.2: Cache hit rate and size gauges ---
+    cacheHitRateGauge_ =
+        meter_->CreateDoubleObservableGauge("rippled_cache_metrics", "Cache hit rates and sizes");
+    cacheHitRateGauge_->AddCallback(
+        [](opentelemetry::metrics::ObserverResult result, void* state) {
+            auto* self = static_cast<MetricsRegistry*>(state);
+            auto& app = self->app_;
+
+            try
+            {
+                // SLE cache hit rate (0.0 - 1.0).
+                auto sleRate = app.cachedSLEs().rate();
+                opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                    opentelemetry::metrics::ObserverResultT<double>>>(result)
+                    ->Observe(sleRate, {{"metric", "SLE_hit_rate"}});
+
+                // Ledger cache hit rate.
+                // TaggedCache::getHitRate() returns 0-100; normalize to
+                // 0.0-1.0 so the Grafana panel using "percentunit" renders
+                // correctly.
+                auto ledgerRate = app.getLedgerMaster().getCacheHitRate() / 100.0;
+                opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                    opentelemetry::metrics::ObserverResultT<double>>>(result)
+                    ->Observe(ledgerRate, {{"metric", "ledger_hit_rate"}});
+
+                // AcceptedLedger cache hit rate (also 0-100 from
+                // TaggedCache; normalize to 0.0-1.0).
+                auto alRate = app.getAcceptedLedgerCache().getHitRate() / 100.0;
+                opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                    opentelemetry::metrics::ObserverResultT<double>>>(result)
+                    ->Observe(alRate, {{"metric", "AL_hit_rate"}});
+
+                // TreeNode cache size.
+                auto tnCacheSize = app.getNodeFamily().getTreeNodeCache()->getCacheSize();
+                opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                    opentelemetry::metrics::ObserverResultT<double>>>(result)
+                    ->Observe(
+                        static_cast<double>(tnCacheSize), {{"metric", "treenode_cache_size"}});
+
+                // TreeNode track size.
+                auto tnTrackSize = app.getNodeFamily().getTreeNodeCache()->getTrackSize();
+                opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                    opentelemetry::metrics::ObserverResultT<double>>>(result)
+                    ->Observe(
+                        static_cast<double>(tnTrackSize), {{"metric", "treenode_track_size"}});
+
+                // FullBelow cache size.
+                auto fbSize = app.getNodeFamily().getFullBelowCache()->size();
+                opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                    opentelemetry::metrics::ObserverResultT<double>>>(result)
+                    ->Observe(static_cast<double>(fbSize), {{"metric", "fullbelow_size"}});
+            }
+            catch (...)  // NOLINT(bugprone-empty-catch)
+            {
+                // Silently skip if services are not yet ready.
+            }
+        },
+        this);
+
+    // --- Task 9.3: TxQ metrics gauges ---
+    txqGauge_ =
+        meter_->CreateDoubleObservableGauge("rippled_txq_metrics", "Transaction queue metrics");
+    txqGauge_->AddCallback(
+        [](opentelemetry::metrics::ObserverResult result, void* state) {
+            auto* self = static_cast<MetricsRegistry*>(state);
+            auto& app = self->app_;
+
+            try
+            {
+                auto const metrics = app.getTxQ().getMetrics(*app.openLedger().current());
+
+                auto observe = [&](char const* name, double value) {
+                    opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                        opentelemetry::metrics::ObserverResultT<double>>>(result)
+                        ->Observe(value, {{"metric", name}});
+                };
+
+                observe("txq_count", static_cast<double>(metrics.txCount));
+                observe(
+                    "txq_max_size",
+                    metrics.txQMaxSize ? static_cast<double>(*metrics.txQMaxSize) : 0.0);
+                observe("txq_in_ledger", static_cast<double>(metrics.txInLedger));
+                observe("txq_per_ledger", static_cast<double>(metrics.txPerLedger));
+                observe(
+                    "txq_reference_fee_level",
+                    static_cast<double>(metrics.referenceFeeLevel.fee()));
+                observe(
+                    "txq_min_processing_fee_level",
+                    static_cast<double>(metrics.minProcessingFeeLevel.fee()));
+                observe("txq_med_fee_level", static_cast<double>(metrics.medFeeLevel.fee()));
+                observe(
+                    "txq_open_ledger_fee_level",
+                    static_cast<double>(metrics.openLedgerFeeLevel.fee()));
+            }
+            catch (...)  // NOLINT(bugprone-empty-catch)
+            {
+                // Silently skip if TxQ or OpenLedger are not yet ready.
+            }
+        },
+        this);
+
+    // --- Task 9.6: Counted object instance gauges ---
+    objectCountGauge_ = meter_->CreateInt64ObservableGauge(
+        "rippled_object_count", "Live instance counts for key internal object types");
+    objectCountGauge_->AddCallback(
+        [](opentelemetry::metrics::ObserverResult result, void* /* state */) {
+            try
+            {
+                // Iterate through all CountedObject types via the linked
+                // list in CountedObjects.  We report all types with count
+                // > 0, filtering to the key types of interest.
+                auto counts = CountedObjects::getInstance().getCounts(0);
+                for (auto const& [name, count] : counts)
+                {
+                    opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                        opentelemetry::metrics::ObserverResultT<int64_t>>>(result)
+                        ->Observe(static_cast<int64_t>(count), {{"type", name}});
+                }
+            }
+            catch (...)  // NOLINT(bugprone-empty-catch)
+            {
+                // Silently skip on error.
+            }
+        },
+        this);
+
+    // --- Task 9.7: Load factor breakdown gauges ---
+    loadFactorGauge_ = meter_->CreateDoubleObservableGauge(
+        "rippled_load_factor_metrics", "Fee load factor breakdown");
+    loadFactorGauge_->AddCallback(
+        [](opentelemetry::metrics::ObserverResult result, void* state) {
+            auto* self = static_cast<MetricsRegistry*>(state);
+            auto& app = self->app_;
+
+            try
+            {
+                auto& feeTrack = app.getFeeTrack();
+                auto const loadBase = static_cast<double>(feeTrack.getLoadBase());
+
+                auto observe = [&](char const* name, double value) {
+                    opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                        opentelemetry::metrics::ObserverResultT<double>>>(result)
+                        ->Observe(value, {{"metric", name}});
+                };
+
+                // Combined load factor (server component).
+                observe(
+                    "load_factor_server", static_cast<double>(feeTrack.getLoadFactor()) / loadBase);
+
+                // Individual factor components.
+                observe(
+                    "load_factor_local", static_cast<double>(feeTrack.getLocalFee()) / loadBase);
+                observe("load_factor_net", static_cast<double>(feeTrack.getRemoteFee()) / loadBase);
+                observe(
+                    "load_factor_cluster",
+                    static_cast<double>(feeTrack.getClusterFee()) / loadBase);
+
+                // Fee escalation factors from TxQ.
+                auto const metrics = app.getTxQ().getMetrics(*app.openLedger().current());
+                auto refLevel = static_cast<double>(metrics.referenceFeeLevel.fee());
+                if (refLevel > 0)
+                {
+                    observe(
+                        "load_factor_fee_escalation",
+                        static_cast<double>(metrics.openLedgerFeeLevel.fee()) / refLevel);
+                    observe(
+                        "load_factor_fee_queue",
+                        static_cast<double>(metrics.minProcessingFeeLevel.fee()) / refLevel);
+                }
+
+                // Combined load factor (max of server and fee escalation).
+                auto const loadFactorServer = feeTrack.getLoadFactor();
+                auto const loadBaseServer = feeTrack.getLoadBase();
+                double combined = static_cast<double>(loadFactorServer) / loadBase;
+                if (refLevel > 0)
+                {
+                    double feeEscalation = static_cast<double>(metrics.openLedgerFeeLevel.fee()) *
+                        loadBaseServer / refLevel;
+                    if (feeEscalation > static_cast<double>(loadFactorServer))
+                    {
+                        combined = feeEscalation / loadBase;
+                    }
+                }
+                observe("load_factor", combined);
+            }
+            catch (...)  // NOLINT(bugprone-empty-catch)
+            {
+                // Silently skip if services are not yet ready.
+            }
+        },
+        this);
+
+    // --- Task 9.1: NodeStore I/O gauges ---
+    // The cumulative counters (reads, writes, bytes) are also exposed here
+    // as observable gauges.  This avoids adding an xrpld dependency into the
+    // libxrpl nodestore code — the MetricsRegistry reads the existing atomic
+    // counters from Database via its public accessors.
+    nodeStoreGauge_ = meter_->CreateInt64ObservableGauge(
+        "rippled_nodestore_state", "NodeStore I/O counters, queue depth, and write load");
+    nodeStoreGauge_->AddCallback(
+        [](opentelemetry::metrics::ObserverResult result, void* state) {
+            auto* self = static_cast<MetricsRegistry*>(state);
+            auto& app = self->app_;
+
+            try
+            {
+                auto& db = app.getNodeStore();
+
+                auto observe = [&](char const* name, int64_t value) {
+                    opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
+                        opentelemetry::metrics::ObserverResultT<int64_t>>>(result)
+                        ->Observe(value, {{"metric", name}});
+                };
+
+                // Cumulative counters (monotonically increasing).
+                observe("node_reads_total", static_cast<int64_t>(db.getFetchTotalCount()));
+                observe("node_reads_hit", static_cast<int64_t>(db.getFetchHitCount()));
+                observe("node_writes", static_cast<int64_t>(db.getStoreCount()));
+                observe("node_written_bytes", static_cast<int64_t>(db.getStoreSize()));
+                observe("node_read_bytes", static_cast<int64_t>(db.getFetchSize()));
+
+                // Write load score (instantaneous).
+                observe("write_load", static_cast<int64_t>(db.getWriteLoad()));
+
+                // Read queue depth (instantaneous).
+                Json::Value obj(Json::objectValue);
+                db.getCountsJson(obj);
+                if (obj.isMember("read_queue"))
+                {
+                    observe("read_queue", static_cast<int64_t>(obj["read_queue"].asUInt()));
+                }
+            }
+            catch (...)  // NOLINT(bugprone-empty-catch)
+            {
+                // Silently skip on error.
+            }
+        },
+        this);
+}
+
+#endif  // XRPL_ENABLE_TELEMETRY
+
+}  // namespace telemetry
+}  // namespace xrpl
diff --git a/src/xrpld/telemetry/MetricsRegistry.h b/src/xrpld/telemetry/MetricsRegistry.h
new file mode 100644
index 0000000000..e6d39892b1
--- /dev/null
+++ b/src/xrpld/telemetry/MetricsRegistry.h
@@ -0,0 +1,284 @@
+#pragma once
+
+/** Central OTel Metrics Registry for rippled.
+
+    Owns all OpenTelemetry metric instruments (counters, histograms,
+    observable gauges) that are NOT already covered by the beast::insight
+    StatsD pipeline. The instruments are created once at startup and polled
+    by the OTel PeriodicExportingMetricReader at a configurable interval
+    (default 10 s).
+
+    When XRPL_ENABLE_TELEMETRY is **not** defined, this class compiles to a
+    lightweight no-op: every public method is an empty inline.
+
+    Dependency / ownership diagram (ASCII):
+
+        Application
+            |
+            +-- MetricsRegistry  (unique_ptr, created in setup(), started/stopped with telemetry)
+                    |
+                    +-- OTel MeterProvider  (owns reader + exporter)
+                    |       |
+                    |       +-- PeriodicExportingMetricReader
+                    |       +-- OtlpHttpMetricExporter
+                    |
+                    +-- Counters / Histograms   (synchronous instruments)
+                    |       +-- rippled_rpc_method_started_total
+                    |       +-- rippled_rpc_method_finished_total
+                    |       +-- rippled_rpc_method_errored_total
+                    |       +-- rippled_rpc_method_duration_us (Histogram)
+                    |       +-- rippled_job_queued_total
+                    |       +-- rippled_job_started_total
+                    |       +-- rippled_job_finished_total
+                    |       +-- rippled_job_queued_duration_us (Histogram)
+                    |       +-- rippled_job_running_duration_us (Histogram)
+                    |
+                    +-- Observable Gauges  (async callbacks, polled by reader)
+                            +-- Cache hit rates  (SLE, ledger, AL)
+                            +-- TreeNode / FullBelow sizes
+                            +-- TxQ metrics
+                            +-- CountedObject counts
+                            +-- Load factor breakdown
+                            +-- NodeStore I/O gauges
+
+    Control-flow for async gauges:
+
+        PeriodicExportingMetricReader (background thread, 10 s tick)
+            |
+            v
+        OTel SDK invokes registered ObservableGauge callbacks
+            |
+            v
+        Each callback reads current value from Application services
+        (e.g. app.getTxQ().getMetrics(), app.getFeeTrack().getLoadFactor())
+            |
+            v
+        Result set is exported via OTLP/HTTP to the collector
+
+    Control-flow for synchronous instruments:
+
+        PerfLogImp::rpcStart/rpcEnd/jobQueue/jobStart/jobFinish
+            |
+            v
+        MetricsRegistry::recordRpc*(method, ...) / recordJob*(type, ...)
+            |
+            v
+        OTel Counter::Add() or Histogram::Record()
+            |
+            v
+        Periodically flushed by the MetricReader
+
+    Example usage:
+
+    @code
+        // In Application::setup(), after telemetry_ is created:
+        metricsRegistry_ = std::make_unique<telemetry::MetricsRegistry>(
+            telemetry_->isEnabled(), app, journal);
+        metricsRegistry_->start(setup.exporterEndpoint);
+
+        // In PerfLogImp::rpcStart():
+        if (auto* mr = app_.getMetricsRegistry())
+            mr->recordRpcStarted("server_info");
+
+        // In PerfLogImp::rpcEnd():
+        if (auto* mr = app_.getMetricsRegistry())
+        {
+            mr->recordRpcFinished("server_info", durationUs);
+            // or: mr->recordRpcErrored("server_info", durationUs);
+        }
+
+        // In PerfLogImp::jobQueue():
+        if (auto* mr = app_.getMetricsRegistry())
+            mr->recordJobQueued("ledgerData");
+
+        // Shutdown:
+        metricsRegistry_->stop();
+    @endcode
+
+    Caveats:
+    - The MetricsRegistry must be created AFTER the Telemetry object because
+      it reads isEnabled() to decide whether to initialize the OTel SDK.
+    - Observable gauge callbacks capture a reference to the Application; the
+      Application must outlive the MetricsRegistry (guaranteed because
+      MetricsRegistry is stopped before Application teardown).
+    - If a new CountedObject type is added, it will NOT appear automatically
+      in the object_count gauge; the callback iterates a fixed list.
+    - Adding a new synchronous instrument requires updating both the header
+      and the .cpp, then calling the new record*() method from the
+      instrumentation site.
+*/
+
+#include <xrpl/beast/utility/Journal.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#ifdef XRPL_ENABLE_TELEMETRY
+#include <opentelemetry/metrics/meter.h>
+#include <opentelemetry/metrics/meter_provider.h>
+#include <opentelemetry/metrics/observer_result.h>
+#include <opentelemetry/sdk/metrics/meter_provider.h>
+#endif
+
+namespace xrpl {
+
+class ServiceRegistry;
+
+namespace telemetry {
+
+class MetricsRegistry
+{
+public:
+    /** Construct a MetricsRegistry.
+
+        @param enabled  Whether OTel metric export is active. When false,
+                        all methods become no-ops.
+        @param app      Reference to the ServiceRegistry (Application) for
+                        reading current metric values in gauge callbacks.
+        @param journal  Journal for log output.
+    */
+    MetricsRegistry(bool enabled, ServiceRegistry& app, beast::Journal journal);
+
+    ~MetricsRegistry();
+
+    /// Non-copyable, non-movable.
+    MetricsRegistry(MetricsRegistry const&) = delete;
+    MetricsRegistry&
+    operator=(MetricsRegistry const&) = delete;
+
+    /** Initialize the OTel metrics pipeline and register all instruments.
+
+        @param endpoint    OTLP/HTTP endpoint URL for metric export
+                           (e.g. "http://localhost:4318/v1/metrics").
+        @param instanceId  Value for the service.instance.id resource
+                           attribute. When non-empty, Prometheus metrics
+                           carry an exported_instance label for per-node
+                           filtering.
+    */
+    void
+    start(std::string const& endpoint, std::string const& instanceId = {});
+
+    /** Flush pending metrics and shut down the pipeline. */
+    void
+    stop();
+
+    /** @return true if the registry is actively exporting metrics. */
+    bool
+    isEnabled() const noexcept
+    {
+        return enabled_;
+    }
+
+    // -----------------------------------------------------------------
+    // Synchronous instrument recording (called from PerfLog hot paths)
+    // -----------------------------------------------------------------
+
+    /** Record an RPC method call start.
+        @param method  The RPC method name (e.g. "server_info").
+    */
+    void
+    recordRpcStarted(std::string_view method);
+
+    /** Record an RPC method call completion.
+        @param method      The RPC method name.
+        @param durationUs  Execution time in microseconds.
+    */
+    void
+    recordRpcFinished(std::string_view method, std::int64_t durationUs);
+
+    /** Record an RPC method call error.
+        @param method      The RPC method name.
+        @param durationUs  Execution time in microseconds.
+    */
+    void
+    recordRpcErrored(std::string_view method, std::int64_t durationUs);
+
+    /** Record a job enqueued event.
+        @param jobType  The job type name (e.g. "ledgerData").
+    */
+    void
+    recordJobQueued(std::string_view jobType);
+
+    /** Record a job start event.
+        @param jobType        The job type name.
+        @param queuedDurUs   Time the job spent waiting in the queue (us).
+    */
+    void
+    recordJobStarted(std::string_view jobType, std::int64_t queuedDurUs);
+
+    /** Record a job finish event.
+        @param jobType         The job type name.
+        @param runningDurUs   Execution time in microseconds.
+    */
+    void
+    recordJobFinished(std::string_view jobType, std::int64_t runningDurUs);
+
+private:
+    /// Master enable flag; when false all methods are no-ops.
+    bool const enabled_;
+
+    /// Reference to Application services for gauge callbacks.
+    ServiceRegistry& app_;
+
+    /// Journal for logging.
+    beast::Journal const journal_;
+
+#ifdef XRPL_ENABLE_TELEMETRY
+    /// The SDK MeterProvider that owns the export pipeline.
+    std::shared_ptr<opentelemetry::sdk::metrics::MeterProvider> provider_;
+
+    /// The Meter used to create all instruments.
+    opentelemetry::nostd::shared_ptr<opentelemetry::metrics::Meter> meter_;
+
+    // --- Synchronous instruments (RPC) ---
+    /// Counter: rpc_method_started_total{method="<name>"}
+    opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Counter<uint64_t>> rpcStartedCounter_;
+    /// Counter: rpc_method_finished_total{method="<name>"}
+    opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Counter<uint64_t>> rpcFinishedCounter_;
+    /// Counter: rpc_method_errored_total{method="<name>"}
+    opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Counter<uint64_t>> rpcErroredCounter_;
+    /// Histogram: rpc_method_duration_us{method="<name>"}
+    opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Histogram<double>>
+        rpcDurationHistogram_;
+
+    // --- Synchronous instruments (Job Queue) ---
+    /// Counter: job_queued_total{job_type="<name>"}
+    opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Counter<uint64_t>> jobQueuedCounter_;
+    /// Counter: job_started_total{job_type="<name>"}
+    opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Counter<uint64_t>> jobStartedCounter_;
+    /// Counter: job_finished_total{job_type="<name>"}
+    opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Counter<uint64_t>> jobFinishedCounter_;
+    /// Histogram: job_queued_duration_us{job_type="<name>"}
+    opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Histogram<double>>
+        jobQueuedDurationHistogram_;
+    /// Histogram: job_running_duration_us{job_type="<name>"}
+    opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Histogram<double>>
+        jobRunningDurationHistogram_;
+
+    // --- Observable gauges (registered via callbacks) ---
+    // Handles are stored so we can remove callbacks on shutdown.
+    /// Observable gauges for cache hit rates and sizes.
+    opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument>
+        cacheHitRateGauge_;
+    /// Observable gauges for TxQ metrics.
+    opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument> txqGauge_;
+    /// Observable gauges for counted object instances.
+    opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument>
+        objectCountGauge_;
+    /// Observable gauges for load factor breakdown.
+    opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument> loadFactorGauge_;
+    /// Observable gauges for NodeStore write_load and read_queue.
+    opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument> nodeStoreGauge_;
+
+    /** Register all observable gauge callbacks with the OTel SDK.
+        Called once during start().
+    */
+    void
+    registerAsyncGauges();
+#endif  // XRPL_ENABLE_TELEMETRY
+};
+
+}  // namespace telemetry
+}  // namespace xrpl