Phase 4: Consensus tracing - round lifecycle, proposals, validations, close time

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Phase 3: Transaction tracing - protobuf context propagation, PeerImp, NetworkOPs
2026-03-27 07:02:33 +00:00 · 2026-03-24 19:17:59 +00:00 · 2026-03-24 19:17:37 +00:00 · 2026-03-24 19:17:05 +00:00 · 2026-03-24 19:15:00 +00:00 · 2026-03-24 19:14:41 +00:00
71 changed files with 11947 additions and 522 deletions
--- a/.github/scripts/levelization/results/ordering.txt
+++ b/.github/scripts/levelization/results/ordering.txt
@@ -33,6 +33,8 @@ libxrpl.server > xrpl.server
 libxrpl.shamap > xrpl.basics
 libxrpl.shamap > xrpl.protocol
 libxrpl.shamap > xrpl.shamap
+libxrpl.telemetry > xrpl.basics
+libxrpl.telemetry > xrpl.telemetry
 libxrpl.tx > xrpl.basics
 libxrpl.tx > xrpl.conditions
 libxrpl.tx > xrpl.core
@@ -91,6 +93,7 @@ test.csf > xrpl.basics
 test.csf > xrpld.consensus
 test.csf > xrpl.json
 test.csf > xrpl.protocol
+test.csf > xrpl.telemetry
 test.json > test.jtx
 test.json > xrpl.json
 test.jtx > xrpl.basics
@@ -175,10 +178,12 @@ test.toplevel > xrpl.json
 test.unit_test > xrpl.basics
 test.unit_test > xrpl.protocol
 tests.libxrpl > xrpl.basics
+tests.libxrpl > xrpld.telemetry
 tests.libxrpl > xrpl.json
 tests.libxrpl > xrpl.net
 tests.libxrpl > xrpl.protocol
 tests.libxrpl > xrpl.protocol_autogen
+tests.libxrpl > xrpl.telemetry
 xrpl.conditions > xrpl.basics
 xrpl.conditions > xrpl.protocol
 xrpl.core > xrpl.basics
@@ -213,6 +218,7 @@ xrpl.server > xrpl.shamap
 xrpl.shamap > xrpl.basics
 xrpl.shamap > xrpl.nodestore
 xrpl.shamap > xrpl.protocol
+xrpl.telemetry > xrpl.basics
 xrpl.tx > xrpl.basics
 xrpl.tx > xrpl.core
 xrpl.tx > xrpl.ledger
@@ -222,6 +228,7 @@ xrpld.app > xrpl.basics
 xrpld.app > xrpl.core
 xrpld.app > xrpld.consensus
 xrpld.app > xrpld.core
+xrpld.app > xrpld.telemetry
 xrpld.app > xrpl.json
 xrpld.app > xrpl.ledger
 xrpld.app > xrpl.net
@@ -231,10 +238,13 @@ xrpld.app > xrpl.rdb
 xrpld.app > xrpl.resource
 xrpld.app > xrpl.server
 xrpld.app > xrpl.shamap
+xrpld.app > xrpl.telemetry
 xrpld.app > xrpl.tx
 xrpld.consensus > xrpl.basics
+xrpld.consensus > xrpld.telemetry
 xrpld.consensus > xrpl.json
 xrpld.consensus > xrpl.protocol
+xrpld.consensus > xrpl.telemetry
 xrpld.core > xrpl.basics
 xrpld.core > xrpl.core
 xrpld.core > xrpl.json
@@ -245,6 +255,7 @@ xrpld.overlay > xrpl.basics
 xrpld.overlay > xrpl.core
 xrpld.overlay > xrpld.core
 xrpld.overlay > xrpld.peerfinder
+xrpld.overlay > xrpld.telemetry
 xrpld.overlay > xrpl.json
 xrpld.overlay > xrpl.protocol
 xrpld.overlay > xrpl.rdb
@@ -262,6 +273,7 @@ xrpld.perflog > xrpl.json
 xrpld.rpc > xrpl.basics
 xrpld.rpc > xrpl.core
 xrpld.rpc > xrpld.core
+xrpld.rpc > xrpld.telemetry
 xrpld.rpc > xrpl.json
 xrpld.rpc > xrpl.ledger
 xrpld.rpc > xrpl.net
@@ -272,3 +284,4 @@ xrpld.rpc > xrpl.resource
 xrpld.rpc > xrpl.server
 xrpld.rpc > xrpl.tx
 xrpld.shamap > xrpl.shamap
+xrpld.telemetry > xrpl.telemetry
--- a/.github/workflows/reusable-build-test-config.yml
+++ b/.github/workflows/reusable-build-test-config.yml
@@ -101,7 +101,7 @@ jobs:
    steps:
      - name: Cleanup workspace (macOS and Windows)
        if: ${{ runner.os == 'macOS' || runner.os == 'Windows' }}
-        uses: XRPLF/actions/cleanup-workspace@c7d9ce5ebb03c752a354889ecd870cadfc2b1cd4
+        uses: XRPLF/actions/cleanup-workspace@cf0433aa74563aead044a1e395610c96d65a37cf

      - name: Checkout repository
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -263,18 +263,6 @@ jobs:
          [ "$COVERAGE_ENABLED" = "true" ] && BUILD_NPROC=$(( BUILD_NPROC - 2 ))
          ./xrpld --unittest --unittest-jobs "${BUILD_NPROC}" 2>&1 | tee unittest.log

-      - name: Show test failure summary
-        if: ${{ failure() && !inputs.build_only }}
-        working-directory: ${{ runner.os == 'Windows' && format('{0}/{1}', env.BUILD_DIR, inputs.build_type) || env.BUILD_DIR }}
-        run: |
-          if [ ! -f unittest.log ]; then
-            echo "unittest.log not found; embedded tests may not have run."
-            exit 0
-          fi
-
-          if ! grep -E "failed" unittest.log; then
-            echo "Log present but no failure lines found in unittest.log."
-          fi
      - name: Debug failure (Linux)
        if: ${{ failure() && runner.os == 'Linux' && !inputs.build_only }}
        run: |
--- a/.github/workflows/reusable-clang-tidy-files.yml
+++ b/.github/workflows/reusable-clang-tidy-files.yml
@@ -78,9 +78,9 @@ jobs:
        id: run_clang_tidy
        continue-on-error: true
        env:
-          TARGETS: ${{ inputs.files != '' && inputs.files || 'src tests' }}
+          FILES: ${{ inputs.files }}
        run: |
-          run-clang-tidy -j ${{ steps.nproc.outputs.nproc }} -p "${BUILD_DIR}" ${TARGETS} 2>&1 | tee clang-tidy-output.txt
+          run-clang-tidy -j ${{ steps.nproc.outputs.nproc }} -p "$BUILD_DIR" $FILES 2>&1 | tee clang-tidy-output.txt

      - name: Upload clang-tidy output
        if: steps.run_clang_tidy.outcome != 'success'
--- a/.github/workflows/upload-conan-deps.yml
+++ b/.github/workflows/upload-conan-deps.yml
@@ -64,7 +64,7 @@ jobs:
    steps:
      - name: Cleanup workspace (macOS and Windows)
        if: ${{ runner.os == 'macOS' || runner.os == 'Windows' }}
-        uses: XRPLF/actions/cleanup-workspace@c7d9ce5ebb03c752a354889ecd870cadfc2b1cd4
+        uses: XRPLF/actions/cleanup-workspace@cf0433aa74563aead044a1e395610c96d65a37cf

      - name: Checkout repository
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -117,6 +117,18 @@ if(rocksdb)
    target_link_libraries(xrpl_libs INTERFACE RocksDB::rocksdb)
 endif()

+# OpenTelemetry distributed tracing (optional).
+# When ON, links against opentelemetry-cpp and defines XRPL_ENABLE_TELEMETRY
+# so that tracing macros in TracingInstrumentation.h are compiled in.
+# When OFF (default), all tracing code compiles to no-ops with zero overhead.
+# Enable via: conan install -o telemetry=True, or cmake -Dtelemetry=ON.
+option(telemetry "Enable OpenTelemetry tracing" OFF)
+if(telemetry)
+    find_package(opentelemetry-cpp CONFIG REQUIRED)
+    add_compile_definitions(XRPL_ENABLE_TELEMETRY)
+    message(STATUS "OpenTelemetry tracing enabled")
+endif()
+
 # Work around changes to Conan recipe for now.
 if(TARGET nudb::core)
    set(nudb nudb::core)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -248,29 +248,6 @@ pip3 install pre-commit
 pre-commit install
 ```

-## Clang-tidy
-
-All code must pass `clang-tidy` checks according to the settings in [`.clang-tidy`](./.clang-tidy).
-
-There is a Continuous Integration job that runs clang-tidy on pull requests. The CI will check:
-
- All changed C++ files (`.cpp`, `.h`, `.ipp`) when only code files are modified
- **All files in the repository** when the `.clang-tidy` configuration file is changed
-
-This ensures that configuration changes don't introduce new warnings across the codebase.
-
-### Running clang-tidy locally
-
-Before running clang-tidy, you must build the project to generate required files (particularly protobuf headers). Refer to [`BUILD.md`](./BUILD.md) for build instructions.
-
-Then run clang-tidy on your local changes:
-
-```
-run-clang-tidy -p build src tests
-```
-
-This will check all source files in the `src` and `tests` directories using the compile commands from your `build` directory.
-
 ## Contracts and instrumentation

 We are using [Antithesis](https://antithesis.com/) for continuous fuzzing,
--- a/OpenTelemetryPlan/00-tracing-fundamentals.md
+++ b/OpenTelemetryPlan/00-tracing-fundamentals.md
@@ -0,0 +1,567 @@
+# Distributed Tracing Fundamentals
+
+> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
+> **Next**: [Architecture Analysis](./01-architecture-analysis.md)
+
+---
+
+## What is Distributed Tracing?
+
+Distributed tracing is a method for tracking data objects as they flow through distributed systems. In a network like XRP Ledger, a single transaction touches multiple independent nodes—each with no shared memory or logging. Distributed tracing connects these dots.
+
+**Without tracing:** You see isolated logs on each node with no way to correlate them.
+
+**With tracing:** You see the complete journey of a transaction or an event across all nodes it touched.
+
+---
+
+## Actors and Actions at a Glance
+
+### Actors
+
+| Who (Plain English)                            | Technical Term  |
+| ---------------------------------------------- | --------------- |
+| A single unit of work being tracked            | Span            |
+| The complete journey of a request              | Trace           |
+| Data that links spans across services          | Trace Context   |
+| Code that creates spans and propagates context | Instrumentation |
+| Service that receives and processes traces     | Collector       |
+| Storage and visualization system               | Backend (Tempo) |
+| Decision logic for which traces to keep        | Sampler         |
+
+### Actions
+
+| What Happens (Plain English)            | Technical Term          |
+| --------------------------------------- | ----------------------- |
+| Start tracking a new operation          | Create a Span           |
+| Connect a child operation to its parent | Set `parent_span_id`    |
+| Group all related operations together   | Share a `trace_id`      |
+| Pass tracking data between services     | Context Propagation     |
+| Decide whether to record a trace        | Sampling (Head or Tail) |
+| Send completed traces to storage        | Export (OTLP)           |
+
+---
+
+## Core Concepts
+
+### 1. Trace
+
+A **trace** represents the entire journey of a request through the system. It has a unique `trace_id` that stays constant across all nodes.
+
+```
+Trace ID: abc123
+├── Node A: received transaction
+├── Node B: relayed transaction
+├── Node C: included in consensus
+└── Node D: applied to ledger
+```
+
+### 2. Span
+
+A **span** represents a single unit of work within a trace. Each span has:
+
+| Attribute        | Description                      | Example                    |
+| ---------------- | -------------------------------- | -------------------------- |
+| `trace_id`       | Identifies the trace             | `event123`                 |
+| `span_id`        | Unique identifier                | `span456`                  |
+| `parent_span_id` | Parent span (if any)             | `p_span123`                |
+| `name`           | Operation name                   | `rpc.submit`               |
+| `start_time`     | When work began (local time)     | `2024-01-15T10:30:00Z`     |
+| `end_time`       | When work completed (local time) | `2024-01-15T10:30:00.050Z` |
+| `attributes`     | Key-value metadata               | `tx.hash=ABC...`           |
+| `status`         | OK, ERROR MSG                    | `OK`                       |
+
+### 3. Trace Context
+
+**Trace context** is the data that propagates between services to link spans together. It contains:
+
+- `trace_id` - The trace this span belongs to
+- `span_id` - The current span (becomes parent for child spans)
+- `trace_flags` - Sampling decisions
+
+---
+
+## How Spans Form a Trace
+
+Spans have parent-child relationships forming a tree structure:
+
+```mermaid
+flowchart TB
+    subgraph trace["Trace: abc123"]
+        A["tx.submit<br/>span_id: 001<br/>50ms"] --> B["tx.validate<br/>span_id: 002<br/>5ms"]
+        A --> C["tx.relay<br/>span_id: 003<br/>10ms"]
+        A --> D["tx.apply<br/>span_id: 004<br/>30ms"]
+        D --> E["ledger.update<br/>span_id: 005<br/>20ms"]
+    end
+
+    style A fill:#0d47a1,stroke:#082f6a,color:#ffffff
+    style B fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style C fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style D fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style E fill:#bf360c,stroke:#8c2809,color:#ffffff
+```
+
+**Reading the diagram:**
+
+- **tx.submit (blue, root)**: The top-level span representing the entire transaction submission; all other spans are its descendants.
+- **tx.validate, tx.relay, tx.apply (green)**: Direct children of tx.submit, representing the three main stages -- validation, relay to peers, and application to the ledger.
+- **ledger.update (red)**: A grandchild span nested under tx.apply, representing the actual ledger state mutation triggered by applying the transaction.
+- **Arrows (parent to child)**: Each arrow indicates a parent-child span relationship where the parent's completion depends on the child finishing.
+
+The same trace visualized as a **timeline (Gantt chart)**:
+
+```
+Time →   0ms    10ms    20ms    30ms    40ms    50ms
+         ├───────────────────────────────────────────┤
+tx.submit│▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓│
+         ├─────┤
+tx.valid │▓▓▓▓▓│
+         │     ├──────────┤
+tx.relay │     │▓▓▓▓▓▓▓▓▓▓│
+         │               ├────────────────────────────┤
+tx.apply │               │▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓│
+         │                         ├──────────────────┤
+ledger   │                         │▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓│
+```
+
+---
+
+## Span Relationships
+
+Spans don't always form simple parent-child trees. Distributed tracing defines several relationship types to capture different causal patterns:
+
+### 1. Parent-Child (ChildOf)
+
+The default relationship. The parent span **depends on** or **contains** the child span. The child runs within the scope of the parent.
+
+```
+tx.submit (parent)
+├── tx.validate (child)     ← parent waits for this
+├── tx.relay (child)        ← parent waits for this
+└── tx.apply (child)        ← parent waits for this
+```
+
+**When to use:** Synchronous calls, nested operations, any case where the parent's completion depends on the child.
+
+### 2. Follows-From
+
+A causal relationship where the first span **triggers** the second, but does **not wait** for it. The originator fires and moves on.
+
+```
+Time →
+
+tx.receive [=======]
+                     ↓ triggers (follows-from)
+              tx.relay   [===========]   ← runs independently
+```
+
+**When to use:** Asynchronous jobs, queued work, fire-and-forget patterns. For example, a node receives a transaction and queues it for relay — the relay span _follows from_ the receive span but the receiver doesn't wait for relaying to complete.
+
+> **OpenTracing** defined `FollowsFrom` as a first-class reference type alongside `ChildOf`.
+> **OpenTelemetry** represents this using **Span Links** with descriptive attributes instead (see below).
+
+### 3. Span Links (Cross-Trace and Non-Hierarchical)
+
+Links connect spans that are **causally related but not in a parent-child hierarchy**. Unlike parent-child, links can cross trace boundaries.
+
+```
+Trace A                          Trace B
+──────                           ──────
+batch.schedule                   batch.execute
+├─ item.enqueue (span X)    ┌──► process.item
+├─ item.enqueue (span Y) ───┤    (links to X, Y, Z)
+├─ item.enqueue (span Z)    └──►
+```
+
+**Use cases:**
+
+| Pattern              | Description                                                                 |
+| -------------------- | --------------------------------------------------------------------------- |
+| **Batch processing** | A batch span links back to all individual spans that contributed to it      |
+| **Fan-in**           | An aggregation span links to the multiple producer spans it merges          |
+| **Fan-out**          | Multiple downstream spans link back to the single span that triggered them  |
+| **Async handoff**    | A deferred job links back to the request that queued it (follows-from)      |
+| **Cross-trace**      | Correlating spans across independent traces (e.g., retries, related events) |
+
+**Link structure:** Each link carries the target span's context plus optional attributes:
+
+```
+Link {
+    trace_id:   <target trace>
+    span_id:    <target span>
+    attributes: { "link.description": "triggered by batch scheduler" }
+}
+```
+
+### Relationship Summary
+
+```mermaid
+flowchart LR
+    subgraph parent_child["Parent-Child"]
+        direction TB
+        P["Parent"] --> C["Child"]
+    end
+
+    subgraph follows_from["Follows-From"]
+        direction TB
+        A["Span A"] -.->|triggers| B["Span B"]
+    end
+
+    subgraph links["Span Links"]
+        direction TB
+        X["Span X\n(Trace 1)"] -.-|link| Y["Span Y\n(Trace 2)"]
+    end
+
+    parent_child ~~~ follows_from ~~~ links
+
+    style P fill:#0d47a1,stroke:#082f6a,color:#ffffff
+    style C fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style A fill:#0d47a1,stroke:#082f6a,color:#ffffff
+    style B fill:#bf360c,stroke:#8c2809,color:#ffffff
+    style X fill:#4a148c,stroke:#38006b,color:#ffffff
+    style Y fill:#4a148c,stroke:#38006b,color:#ffffff
+```
+
+| Relationship     | Same Trace? | Dependency?                | OTel Mechanism    |
+| ---------------- | ----------- | -------------------------- | ----------------- |
+| **Parent-Child** | Yes         | Parent depends on child    | `parent_span_id`  |
+| **Follows-From** | Usually     | Causal but no dependency   | Link + attributes |
+| **Span Link**    | Either      | Correlation, no dependency | Link + attributes |
+
+---
+
+## Trace ID Generation
+
+A `trace_id` is a 128-bit (16-byte) identifier that groups all spans belonging to one logical operation. How it's generated determines how easily you can find and correlate traces later.
+
+### General Approaches
+
+#### 1. Random (W3C Default)
+
+Generate a random 128-bit ID when a trace starts. Standard approach for most services.
+
+```
+trace_id = random_128_bits()
+```
+
+| Pros                        | Cons                                          |
+| --------------------------- | --------------------------------------------- |
+| Simple, standard            | No natural correlation to domain events       |
+| Guaranteed unique per trace | If propagation is lost, trace is broken       |
+| Works with all OTel tooling | "Find trace for TX abc" requires index lookup |
+
+#### 2. Deterministic (Derived from Domain Data)
+
+Compute the trace_id from a hash of a natural identifier. Every node independently derives the **same** trace_id for the same event.
+
+```
+trace_id = SHA-256(domain_identifier)[0:16]   // truncate to 128 bits
+```
+
+| Pros                                                | Cons                                                       |
+| --------------------------------------------------- | ---------------------------------------------------------- |
+| Propagation-resilient — same ID computed everywhere | Same event processed twice (retry) shares trace_id         |
+| Natural search — domain ID maps directly to trace   | Non-standard (tooling assumes random)                      |
+| No coordination needed between nodes                | 256→128 bit truncation (collision risk negligible at ~2⁶⁴) |
+
+#### 3. Hybrid (Deterministic Prefix + Random Suffix)
+
+First 8 bytes derived from domain data, last 8 bytes random.
+
+```
+trace_id = SHA-256(domain_identifier)[0:8] || random_64_bits()
+```
+
+| Pros                                        | Cons                                     |
+| ------------------------------------------- | ---------------------------------------- |
+| Prefix search: "find all traces for TX abc" | Must propagate to maintain full trace_id |
+| Unique per processing instance              | More complex generation logic            |
+| Retries get distinct trace_ids              | Partial correlation only (prefix match)  |
+
+### XRPL Workflow Analysis
+
+XRPL has a unique advantage: its core workflows produce **globally unique 256-bit hashes** that are known on every node. This makes deterministic trace_id generation practical in ways most systems can't achieve.
+
+#### Natural Identifiers by Workflow
+
+| Workflow            | Natural Identifier                | Size       | Known at Start?               | Same on All Nodes?               |
+| ------------------- | --------------------------------- | ---------- | ----------------------------- | -------------------------------- |
+| **Transaction**     | Transaction hash (`tid_`)         | 256-bit    | Yes — computed before signing | Yes — hash of canonical tx data  |
+| **Consensus round** | Previous ledger hash + ledger seq | 256+32 bit | Yes — known when round opens  | Yes — all validators agree       |
+| **Validation**      | Ledger hash being validated       | 256-bit    | Yes — from consensus result   | Yes — same closed ledger         |
+| **Ledger catch-up** | Target ledger hash                | 256-bit    | Yes — we know what to fetch   | Yes — identifies ledger globally |
+
+#### Where These Identifiers Live in Code
+
+```
+Transaction:     STTx::getTransactionID()     → uint256 tid_
+                 TMTransaction::rawTransaction → recompute hash from bytes
+
+Consensus:       ConsensusProposal::prevLedger_ → uint256 (previous ledger hash)
+                 ConsensusProposal::position_   → uint256 (TxSet hash)
+                 LedgerHeader::seq              → uint32_t (ledger sequence)
+
+Validation:      STValidation::getLedgerHash()  → uint256
+                 STValidation::getNodeID()      → NodeID (160-bit)
+
+Ledger fetch:    InboundLedger constructor      → uint256 hash, uint32_t seq
+                 TMGetLedger::ledgerHash        → bytes (uint256)
+```
+
+### Recommended Strategy: Workflow-Scoped Deterministic
+
+Each workflow type derives its trace_id from its natural domain identifier:
+
+```
+Transaction trace:   trace_id = SHA-256("tx"    || tx_hash)[0:16]
+Consensus trace:     trace_id = SHA-256("cons"  || prev_ledger_hash || ledger_seq)[0:16]
+Ledger catch-up:     trace_id = SHA-256("fetch" || target_ledger_hash)[0:16]
+```
+
+The string prefix (`"tx"`, `"cons"`, `"fetch"`) prevents collisions between workflows that might share underlying hashes.
+
+**Why this works for XRPL:**
+
+1. **Propagation-resilient** — Even if a P2P message drops trace context, every node independently computes the same trace_id from the same tx_hash or ledger_hash. Spans still correlate.
+
+2. **Zero-cost search** — "Show me the trace for transaction ABC" becomes a direct lookup: compute `SHA-256("tx" || ABC)[0:16]` and query. No secondary index needed.
+
+3. **Cross-workflow linking via Span Links** — A consensus trace links to individual transaction traces. A validation span links to the consensus trace. This connects the full picture without forcing everything into one giant trace.
+
+### Cross-Workflow Correlation
+
+Each workflow gets its own trace. Span Links tie them together:
+
+```mermaid
+flowchart TB
+    subgraph tx_trace["Transaction Trace"]
+        direction LR
+        Tn["trace_id = f(tx_hash)"]:::note --> T1["tx.receive"] --> T2["tx.validate"] --> T3["tx.relay"]
+    end
+
+    subgraph cons_trace["Consensus Trace"]
+        direction LR
+        Cn["trace_id = f(prev_ledger, seq)"]:::note --> C1["cons.open"] --> C2["cons.propose"] --> C3["cons.accept"]
+    end
+
+    subgraph val_trace["Validation"]
+        direction LR
+        Vn["spans within consensus trace"]:::note --> V1["val.create"] --> V2["val.broadcast"]
+    end
+
+    subgraph fetch_trace["Catch-Up Trace"]
+        direction LR
+        Fn["trace_id = f(ledger_hash)"]:::note --> F1["fetch.request"] --> F2["fetch.receive"] --> F3["fetch.apply"]
+    end
+
+    C1 -.-|"span link\n(tx traces)"| T3
+    C3 --> V1
+    F1 -.-|"span link\n(target ledger)"| C3
+
+    classDef note fill:none,stroke:#888,stroke-dasharray:5 5,color:#333,font-style:italic
+    style T1 fill:#0d47a1,stroke:#082f6a,color:#ffffff
+    style T2 fill:#0d47a1,stroke:#082f6a,color:#ffffff
+    style T3 fill:#0d47a1,stroke:#082f6a,color:#ffffff
+    style C1 fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style C2 fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style C3 fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style V1 fill:#bf360c,stroke:#8c2809,color:#ffffff
+    style V2 fill:#bf360c,stroke:#8c2809,color:#ffffff
+    style F1 fill:#4a148c,stroke:#38006b,color:#ffffff
+    style F2 fill:#4a148c,stroke:#38006b,color:#ffffff
+    style F3 fill:#4a148c,stroke:#38006b,color:#ffffff
+```
+
+**Reading the diagram:**
+
+- **Transaction Trace (blue)**: An independent trace whose `trace_id` is deterministically derived from the transaction hash. Contains receive, validate, and relay spans.
+- **Consensus Trace (green)**: An independent trace whose `trace_id` is derived from the previous ledger hash and sequence number. Covers the open, propose, and accept phases.
+- **Validation (red)**: Validation spans live within the consensus trace (not a separate trace). They are created after the accept phase completes.
+- **Catch-Up Trace (purple)**: An independent trace for ledger acquisition, derived from the target ledger hash. Used when a node is behind and fetching missing ledgers.
+- **Dotted arrows (span links)**: Cross-trace correlations. Consensus links to transaction traces it included; catch-up links to the consensus trace that produced the target ledger.
+- **Solid arrow (C3 to V1)**: A parent-child relationship -- validation spans are direct children of the consensus accept span within the same trace.
+
+**How a query flows:**
+
+```
+"Why was TX abc slow?"
+  1. Compute trace_id = SHA-256("tx" || abc)[0:16]
+  2. Find transaction trace → see it was included in consensus round N
+  3. Follow span link → consensus trace for round N
+  4. See which phase was slow (propose? accept?)
+  5. If a node was catching up, follow link → catch-up trace
+```
+
+### Trade-offs to Consider
+
+| Concern                       | Mitigation                                                                                                                    |
+| ----------------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| **Retries get same trace_id** | Add `attempt` attribute to root span; spans have unique span_ids and timestamps                                               |
+| **256→128 bit truncation**    | Birthday-bound collision at ~2⁶⁴ operations — negligible for XRPL's throughput                                                |
+| **Non-standard generation**   | OTel spec allows any 16-byte non-zero value; tooling works on the hex string                                                  |
+| **Hash computation cost**     | SHA-256 is ~0.3μs per call; XRPL already computes these hashes for other purposes                                             |
+| **Late-binding identifiers**  | Ledger hash isn't known until after consensus — validation spans use ledger_seq as fallback, then link to the consensus trace |
+
+---
+
+## Distributed Traces Across Nodes
+
+In distributed systems like rippled, traces span **multiple independent nodes**. The trace context must be propagated in network messages:
+
+```mermaid
+sequenceDiagram
+    participant Client
+    participant NodeA as Node A
+    participant NodeB as Node B
+    participant NodeC as Node C
+
+    Client->>NodeA: Submit TX<br/>(no trace context)
+
+    Note over NodeA: Creates new trace<br/>trace_id: abc123<br/>span: tx.receive
+
+    NodeA->>NodeB: Relay TX<br/>(trace_id: abc123, parent: 001)
+
+    Note over NodeB: Creates child span<br/>span: tx.relay<br/>parent_span_id: 001
+
+    NodeA->>NodeC: Relay TX<br/>(trace_id: abc123, parent: 001)
+
+    Note over NodeC: Creates child span<br/>span: tx.relay<br/>parent_span_id: 001
+
+    Note over NodeA,NodeC: All spans share trace_id: abc123<br/>enabling correlation across nodes
+```
+
+**Reading the diagram:**
+
+- **Client**: The external entity that submits a transaction. It does not carry trace context -- the trace originates at the first node.
+- **Node A**: The entry point that creates a new trace (trace_id: abc123) and the root span `tx.receive`. It relays the transaction to peers with trace context attached.
+- **Node B and Node C**: Peer nodes that receive the relayed transaction along with the propagated trace context. Each creates a child span under Node A's span, preserving the same `trace_id`.
+- **Arrows with trace context**: The relay messages carry `trace_id` and `parent_span_id`, allowing each downstream node to link its spans back to the originating span on Node A.
+
+---
+
+## Context Propagation
+
+For traces to work across nodes, **trace context must be propagated** in messages.
+
+### What's in the Context (~26 bytes)
+
+| Field         | Size     | Description                                             |
+| ------------- | -------- | ------------------------------------------------------- |
+| `trace_id`    | 16 bytes | Identifies the entire trace (constant across all nodes) |
+| `span_id`     | 8 bytes  | The sender's current span (becomes parent on receiver)  |
+| `trace_flags` | 1 byte   | Sampling decision (bit 0 = sampled; bits 1-7 reserved)  |
+| `trace_state` | variable | Optional vendor-specific data (typically omitted)       |
+
+### How span_id Changes at Each Hop
+
+Only **one** `span_id` travels in the context - the sender's current span. Each node:
+
+1. Extracts the received `span_id` and uses it as the `parent_span_id`
+2. Creates a **new** `span_id` for its own span
+3. Sends its own `span_id` as the parent when forwarding
+
+```
+Node A                      Node B                      Node C
+──────                      ──────                      ──────
+
+Span AAA                    Span BBB                    Span CCC
+   │                           │                           │
+   ▼                           ▼                           ▼
+Context out:                Context out:                Context out:
+├─ trace_id: abc123         ├─ trace_id: abc123         ├─ trace_id: abc123
+├─ span_id: AAA ──────────► ├─ span_id: BBB ──────────► ├─ span_id: CCC ──────►
+└─ flags: 01                └─ flags: 01                └─ flags: 01
+                               │                           │
+                          parent = AAA               parent = BBB
+```
+
+The `trace_id` stays constant, but `span_id` **changes at every hop** to maintain the parent-child chain.
+
+### Propagation Formats
+
+There are two patterns:
+
+### HTTP/RPC Headers (W3C Trace Context)
+
+```
+traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01
+             │  │                                │                │
+             │  │                                │                └── Flags (sampled)
+             │  │                                └── Parent span ID (16 hex)
+             │  └── Trace ID (32 hex)
+             └── Version
+```
+
+### Protocol Buffers (rippled P2P messages)
+
+```protobuf
+message TMTransaction {
+    bytes rawTransaction = 1;
+    // ... existing fields ...
+
+    // Trace context extension
+    bytes trace_parent = 100;  // W3C traceparent
+    bytes trace_state = 101;   // W3C tracestate
+}
+```
+
+---
+
+## Sampling
+
+Not every trace needs to be recorded. **Sampling** reduces overhead:
+
+### Head Sampling (at trace start)
+
+```
+Request arrives → Random 10% chance → Record or skip entire trace
+```
+
+- ✅ Low overhead
+- ❌ May miss interesting traces
+
+### Tail Sampling (after trace completes)
+
+```
+Trace completes → Collector evaluates:
+                  - Error? → KEEP
+                  - Slow? → KEEP
+                  - Normal? → Sample 10%
+```
+
+- ✅ Never loses important traces
+- ❌ Higher memory usage at collector
+
+---
+
+## Key Benefits for rippled
+
+| Challenge                          | How Tracing Helps                        |
+| ---------------------------------- | ---------------------------------------- |
+| "Where is my transaction?"         | Follow trace across all nodes it touched |
+| "Why was consensus slow?"          | See timing breakdown of each phase       |
+| "Which node is the bottleneck?"    | Compare span durations across nodes      |
+| "What happened during the outage?" | Correlate errors across the network      |
+
+---
+
+## Glossary
+
+| Term                 | Definition                                                          |
+| -------------------- | ------------------------------------------------------------------- |
+| **Trace**            | Complete journey of a request, identified by `trace_id`             |
+| **Span**             | Single operation within a trace                                     |
+| **Parent-Child**     | Span relationship where the parent depends on the child             |
+| **Follows-From**     | Causal relationship where originator doesn't wait for the result    |
+| **Span Link**        | Non-hierarchical connection between spans, possibly across traces   |
+| **Deterministic ID** | Trace ID derived from domain data (e.g., tx_hash) instead of random |
+| **Context**          | Data propagated between services (`trace_id`, `span_id`, flags)     |
+| **Instrumentation**  | Code that creates spans and propagates context                      |
+| **Collector**        | Service that receives, processes, and exports traces                |
+| **Backend**          | Storage/visualization system (Tempo)                                |
+| **Head Sampling**    | Sampling decision at trace start                                    |
+| **Tail Sampling**    | Sampling decision after trace completes                             |
+
+---
+
+_Next: [Architecture Analysis](./01-architecture-analysis.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
--- a/OpenTelemetryPlan/01-architecture-analysis.md
+++ b/OpenTelemetryPlan/01-architecture-analysis.md
@@ -0,0 +1,467 @@
+# Architecture Analysis
+
+> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
+> **Related**: [Design Decisions](./02-design-decisions.md) | [Implementation Strategy](./03-implementation-strategy.md)
+
+---
+
+## 1.1 Current rippled Architecture Overview
+
+> **WS** = WebSocket | **UNL** = Unique Node List | **TxQ** = Transaction Queue | **StatsD** = Statistics Daemon
+
+The rippled node software consists of several interconnected components that need instrumentation for distributed tracing:
+
+```mermaid
+flowchart TB
+    subgraph rippled["rippled Node"]
+        subgraph services["Core Services"]
+            RPC["RPC Server<br/>(HTTP/WS/gRPC)"]
+            Overlay["Overlay<br/>(P2P Network)"]
+            Consensus["Consensus<br/>(RCLConsensus)"]
+            ValidatorList["ValidatorList<br/>(UNL Mgmt)"]
+        end
+
+        JobQueue["JobQueue<br/>(Thread Pool)"]
+
+        subgraph processing["Processing Layer"]
+            NetworkOPs["NetworkOPs<br/>(Tx Processing)"]
+            LedgerMaster["LedgerMaster<br/>(Ledger Mgmt)"]
+            NodeStore["NodeStore<br/>(Database)"]
+            InboundLedgers["InboundLedgers<br/>(Ledger Sync)"]
+        end
+
+        subgraph appservices["Application Services"]
+            PathFind["PathFinding<br/>(Payment Paths)"]
+            TxQ["TxQ<br/>(Fee Escalation)"]
+            LoadMgr["LoadManager<br/>(Fee/Load)"]
+        end
+
+        subgraph observability["Existing Observability"]
+            PerfLog["PerfLog<br/>(JSON)"]
+            Insight["Insight<br/>(StatsD)"]
+            Logging["Logging<br/>(Journal)"]
+        end
+
+        services --> JobQueue
+        JobQueue --> processing
+        JobQueue --> appservices
+    end
+
+    style rippled fill:#424242,stroke:#212121,color:#ffffff
+    style services fill:#1565c0,stroke:#0d47a1,color:#ffffff
+    style processing fill:#2e7d32,stroke:#1b5e20,color:#ffffff
+    style appservices fill:#6a1b9a,stroke:#4a148c,color:#ffffff
+    style observability fill:#e65100,stroke:#bf360c,color:#ffffff
+```
+
+**Reading the diagram:**
+
+- **Core Services (blue)**: The entry points into rippled -- RPC Server handles client requests, Overlay manages peer-to-peer networking, Consensus drives agreement, and ValidatorList manages trusted validators.
+- **JobQueue (center)**: The asynchronous thread pool that decouples Core Services from the Processing and Application layers. All work flows through it.
+- **Processing Layer (green)**: Core business logic -- NetworkOPs processes transactions, LedgerMaster manages ledger state, NodeStore handles persistence, and InboundLedgers synchronizes missing data.
+- **Application Services (purple)**: Higher-level features -- PathFinding computes payment routes, TxQ manages fee-based queuing, and LoadManager tracks server load.
+- **Existing Observability (orange)**: The current monitoring stack (PerfLog, Insight, Journal logging) that OpenTelemetry will complement, not replace.
+- **Arrows (Services to JobQueue to layers)**: Work originates at Core Services, is enqueued onto the JobQueue, and dispatched to Processing or Application layers for execution.
+
+---
+
+## 1.1.1 Actors and Actions
+
+### Actors
+
+| Who (Plain English)                       | Technical Term             |
+| ----------------------------------------- | -------------------------- |
+| Network node running XRPL software        | rippled node               |
+| External client submitting requests       | RPC Client                 |
+| Network neighbor sharing data             | Peer (PeerImp)             |
+| Request handler for client queries        | RPC Server (ServerHandler) |
+| Command executor for specific RPC methods | RPCHandler                 |
+| Agreement process between nodes           | Consensus (RCLConsensus)   |
+| Transaction processing coordinator        | NetworkOPs                 |
+| Background task scheduler                 | JobQueue                   |
+| Ledger state manager                      | LedgerMaster               |
+| Payment route calculator                  | PathFinding (Pathfinder)   |
+| Transaction waiting room                  | TxQ (Transaction Queue)    |
+| Fee adjustment system                     | LoadManager                |
+| Trusted validator list manager            | ValidatorList              |
+| Protocol upgrade tracker                  | AmendmentTable             |
+| Ledger state hash tree                    | SHAMap                     |
+| Persistent key-value storage              | NodeStore                  |
+
+### Actions
+
+| What Happens (Plain English)                   | Technical Term         |
+| ---------------------------------------------- | ---------------------- |
+| Client sends a request to a node               | `rpc.request`          |
+| Node executes a specific RPC command           | `rpc.command.*`        |
+| Node receives a transaction from a peer        | `tx.receive`           |
+| Node checks if a transaction is valid          | `tx.validate`          |
+| Node forwards a transaction to neighbors       | `tx.relay`             |
+| Nodes agree on which transactions to include   | `consensus.round`      |
+| Consensus progresses through phases            | `consensus.phase.*`    |
+| Node builds a new confirmed ledger             | `ledger.build`         |
+| Node fetches missing ledger data from peers    | `ledger.acquire`       |
+| Node computes payment routes                   | `pathfind.compute`     |
+| Node queues a transaction for later processing | `txq.enqueue`          |
+| Node increases fees due to high load           | `fee.escalate`         |
+| Node fetches the latest trusted validator list | `validator.list.fetch` |
+| Node votes on a protocol amendment             | `amendment.vote`       |
+| Node synchronizes state tree data              | `shamap.sync`          |
+
+---
+
+## 1.2 Key Components for Instrumentation
+
+> **TxQ** = Transaction Queue | **UNL** = Unique Node List
+
+| Component          | Location                                   | Purpose                  | Trace Value                      |
+| ------------------ | ------------------------------------------ | ------------------------ | -------------------------------- |
+| **Overlay**        | `src/xrpld/overlay/`                       | P2P communication        | Message propagation timing       |
+| **PeerImp**        | `src/xrpld/overlay/detail/PeerImp.cpp`     | Individual peer handling | Per-peer latency                 |
+| **RCLConsensus**   | `src/xrpld/app/consensus/RCLConsensus.cpp` | Consensus algorithm      | Round timing, phase analysis     |
+| **NetworkOPs**     | `src/xrpld/app/misc/NetworkOPs.cpp`        | Transaction processing   | Tx lifecycle tracking            |
+| **ServerHandler**  | `src/xrpld/rpc/detail/ServerHandler.cpp`   | RPC entry point          | Request latency                  |
+| **RPCHandler**     | `src/xrpld/rpc/detail/RPCHandler.cpp`      | Command execution        | Per-command timing               |
+| **JobQueue**       | `src/xrpl/core/JobQueue.h`                 | Async task execution     | Queue wait times                 |
+| **PathFinding**    | `src/xrpld/app/paths/`                     | Payment path computation | Path latency, cache hits         |
+| **TxQ**            | `src/xrpld/app/misc/TxQ.cpp`               | Transaction queue/fees   | Queue depth, eviction rates      |
+| **LoadManager**    | `src/xrpld/app/main/LoadManager.cpp`       | Fee escalation/load      | Fee levels, load factors         |
+| **InboundLedgers** | `src/xrpld/app/ledger/InboundLedgers.cpp`  | Ledger acquisition       | Sync time, peer reliability      |
+| **ValidatorList**  | `src/xrpld/app/misc/ValidatorList.cpp`     | UNL management           | List freshness, fetch failures   |
+| **AmendmentTable** | `src/xrpld/app/misc/AmendmentTable.cpp`    | Protocol amendments      | Voting status, activation events |
+| **SHAMap**         | `src/xrpld/shamap/`                        | State hash tree          | Sync speed, missing nodes        |
+
+---
+
+## 1.3 Transaction Flow Diagram
+
+Transaction flow spans multiple nodes in the network. Each node creates linked spans to form a distributed trace:
+
+```mermaid
+sequenceDiagram
+    participant Client
+    participant PeerA as Peer A (Receive)
+    participant PeerB as Peer B (Relay)
+    participant PeerC as Peer C (Validate)
+
+    Client->>PeerA: 1. Submit TX
+
+    rect rgb(230, 245, 255)
+        Note over PeerA: tx.receive SPAN START
+        PeerA->>PeerA: HashRouter Deduplication
+        PeerA->>PeerA: tx.validate (child span)
+    end
+
+    PeerA->>PeerB: 2. Relay TX (with trace ctx)
+
+    rect rgb(230, 245, 255)
+        Note over PeerB: tx.receive (linked span)
+    end
+
+    PeerB->>PeerC: 3. Relay TX
+
+    rect rgb(230, 245, 255)
+        Note over PeerC: tx.receive (linked span)
+        PeerC->>PeerC: tx.process
+    end
+
+    Note over Client,PeerC: DISTRIBUTED TRACE (same trace_id: abc123)
+```
+
+**Reading the diagram:**
+
+- **Client**: The external entity that submits a transaction to Peer A. It has no trace context -- the trace starts at the first node.
+- **Peer A (Receive)**: The entry node that creates the root span `tx.receive`, runs HashRouter deduplication to avoid processing duplicates, and creates a child `tx.validate` span.
+- **Peer A to Peer B arrow**: The relay message carries trace context (trace_id + parent span_id), enabling Peer B to create a linked span under the same trace.
+- **Peer B (Relay)**: Receives the transaction and trace context, creates a `tx.receive` span linked to Peer A's trace, then relays onward.
+- **Peer C (Validate)**: Final hop in this example. Creates a linked `tx.receive` span and runs `tx.process` to fully process the transaction.
+- **Blue rectangles**: Highlight the span boundaries on each node, showing where instrumentation creates and closes spans.
+
+### Trace Structure
+
+```
+trace_id: abc123
+├── span: tx.receive (Peer A)
+│   ├── span: tx.validate
+│   └── span: tx.relay
+├── span: tx.receive (Peer B) [parent: Peer A]
+│   └── span: tx.relay
+└── span: tx.receive (Peer C) [parent: Peer B]
+    └── span: tx.process
+```
+
+---
+
+## 1.4 Consensus Round Flow
+
+Consensus rounds are multi-phase operations that benefit significantly from tracing:
+
+```mermaid
+flowchart TB
+    subgraph round["consensus.round (root span)"]
+        attrs["Attributes:<br/>xrpl.consensus.ledger.seq = 12345678<br/>xrpl.consensus.mode = proposing<br/>xrpl.consensus.proposers = 35"]
+
+        subgraph open["consensus.phase.open"]
+            open_desc["Duration: ~3s<br/>Waiting for transactions"]
+        end
+
+        subgraph establish["consensus.phase.establish"]
+            est_attrs["proposals_received = 28<br/>disputes_resolved = 3"]
+            est_children["├── consensus.proposal.receive (×28)<br/>├── consensus.proposal.send (×1)<br/>└── consensus.dispute.resolve (×3)"]
+        end
+
+        subgraph accept["consensus.phase.accept"]
+            acc_attrs["transactions_applied = 150<br/>ledger.hash = DEF456..."]
+            acc_children["├── ledger.build<br/>└── ledger.validate"]
+        end
+
+        attrs --> open
+        open --> establish
+        establish --> accept
+    end
+
+    style round fill:#f57f17,stroke:#e65100,color:#ffffff
+    style open fill:#1565c0,stroke:#0d47a1,color:#ffffff
+    style establish fill:#2e7d32,stroke:#1b5e20,color:#ffffff
+    style accept fill:#c2185b,stroke:#880e4f,color:#ffffff
+```
+
+**Reading the diagram:**
+
+- **consensus.round (orange, root span)**: The top-level span encompassing the entire consensus round, with attributes like ledger sequence, mode, and proposer count.
+- **consensus.phase.open (blue)**: The first phase where the node waits (~3s) to collect incoming transactions before proposing.
+- **consensus.phase.establish (green)**: The negotiation phase where validators exchange proposals, resolve disputes, and converge on a transaction set. Child spans track each proposal received/sent and each dispute resolved.
+- **consensus.phase.accept (pink)**: The final phase where the agreed transaction set is applied, a new ledger is built, and the ledger is validated. Child spans cover `ledger.build` and `ledger.validate`.
+- **Arrows (open to establish to accept)**: The sequential flow through the three consensus phases. Each phase must complete before the next begins.
+
+---
+
+## 1.5 RPC Request Flow
+
+> **WS** = WebSocket
+
+RPC requests support W3C Trace Context headers for distributed tracing across services:
+
+```mermaid
+flowchart TB
+    subgraph request["rpc.request (root span)"]
+        http["HTTP Request — POST /<br/>traceparent:<br/>00-abc123...-def456...-01"]
+
+        attrs["Attributes:<br/>http.method = POST<br/>net.peer.ip = 192.168.1.100<br/>xrpl.rpc.command = submit"]
+
+        subgraph enqueue["jobqueue.enqueue"]
+            job_attr["xrpl.job.type = jtCLIENT_RPC"]
+        end
+
+        subgraph command["rpc.command.submit"]
+            cmd_attrs["xrpl.rpc.version = 2<br/>xrpl.rpc.role = user"]
+            cmd_children["├── tx.deserialize<br/>├── tx.validate_local<br/>└── tx.submit_to_network"]
+        end
+
+        response["Response: 200 OK<br/>Duration: 45ms"]
+
+        http --> attrs
+        attrs --> enqueue
+        enqueue --> command
+        command --> response
+    end
+
+    style request fill:#2e7d32,stroke:#1b5e20,color:#ffffff
+    style enqueue fill:#1565c0,stroke:#0d47a1,color:#ffffff
+    style command fill:#e65100,stroke:#bf360c,color:#ffffff
+```
+
+**Reading the diagram:**
+
+- **rpc.request (green, root span)**: The outermost span representing the full RPC request lifecycle, from HTTP receipt to response. Carries the W3C `traceparent` header for distributed tracing.
+- **HTTP Request node**: Shows the incoming POST request with its `traceparent` header and extracted attributes (method, peer IP, command name).
+- **jobqueue.enqueue (blue)**: The span covering the asynchronous handoff from the RPC thread to the JobQueue worker thread. The trace context is preserved across this async boundary.
+- **rpc.command.submit (orange)**: The span for the actual command execution, with child spans for deserialization, local validation, and network submission.
+- **Response node**: The final output with HTTP status and total duration, marking the end of the root span.
+- **Arrows (top to bottom)**: The sequential processing pipeline -- receive request, extract attributes, enqueue job, execute command, return response.
+
+---
+
+## 1.6 Key Trace Points
+
+> **TxQ** = Transaction Queue
+
+The following table identifies priority instrumentation points across the codebase:
+
+| Category        | Span Name              | File                   | Method                  | Priority |
+| --------------- | ---------------------- | ---------------------- | ----------------------- | -------- |
+| **Transaction** | `tx.receive`           | `PeerImp.cpp`          | `handleTransaction()`   | High     |
+| **Transaction** | `tx.validate`          | `NetworkOPs.cpp`       | `processTransaction()`  | High     |
+| **Transaction** | `tx.process`           | `NetworkOPs.cpp`       | `doTransactionSync()`   | High     |
+| **Transaction** | `tx.relay`             | `OverlayImpl.cpp`      | `relay()`               | Medium   |
+| **Consensus**   | `consensus.round`      | `RCLConsensus.cpp`     | `startRound()`          | High     |
+| **Consensus**   | `consensus.phase.*`    | `Consensus.h`          | `timerEntry()`          | High     |
+| **Consensus**   | `consensus.proposal.*` | `RCLConsensus.cpp`     | `peerProposal()`        | Medium   |
+| **RPC**         | `rpc.request`          | `ServerHandler.cpp`    | `onRequest()`           | High     |
+| **RPC**         | `rpc.command.*`        | `RPCHandler.cpp`       | `doCommand()`           | High     |
+| **Peer**        | `peer.connect`         | `OverlayImpl.cpp`      | `onHandoff()`           | Low      |
+| **Peer**        | `peer.message.*`       | `PeerImp.cpp`          | `onMessage()`           | Low      |
+| **Ledger**      | `ledger.acquire`       | `InboundLedgers.cpp`   | `acquire()`             | Medium   |
+| **Ledger**      | `ledger.build`         | `RCLConsensus.cpp`     | `buildLCL()`            | High     |
+| **PathFinding** | `pathfind.request`     | `PathRequest.cpp`      | `doUpdate()`            | High     |
+| **PathFinding** | `pathfind.compute`     | `Pathfinder.cpp`       | `findPaths()`           | High     |
+| **TxQ**         | `txq.enqueue`          | `TxQ.cpp`              | `apply()`               | High     |
+| **TxQ**         | `txq.apply`            | `TxQ.cpp`              | `processClosedLedger()` | High     |
+| **Fee**         | `fee.escalate`         | `LoadManager.cpp`      | `raiseLocalFee()`       | Medium   |
+| **Ledger**      | `ledger.replay`        | `LedgerReplayer.h`     | `replay()`              | Medium   |
+| **Ledger**      | `ledger.delta`         | `LedgerDeltaAcquire.h` | `processData()`         | Medium   |
+| **Validator**   | `validator.list.fetch` | `ValidatorList.cpp`    | `verify()`              | Medium   |
+| **Validator**   | `validator.manifest`   | `Manifest.cpp`         | `applyManifest()`       | Low      |
+| **Amendment**   | `amendment.vote`       | `AmendmentTable.cpp`   | `doVoting()`            | Low      |
+| **SHAMap**      | `shamap.sync`          | `SHAMap.cpp`           | `fetchRoot()`           | Medium   |
+
+---
+
+## 1.7 Instrumentation Priority
+
+> **TxQ** = Transaction Queue
+
+```mermaid
+quadrantChart
+    title Instrumentation Priority Matrix
+    x-axis Low Complexity --> High Complexity
+    y-axis Low Value --> High Value
+    quadrant-1 Implement First
+    quadrant-2 Plan Carefully
+    quadrant-3 Quick Wins
+    quadrant-4 Consider Later
+
+    RPC Tracing: [0.2, 0.92]
+    Transaction Tracing: [0.55, 0.88]
+    Consensus Tracing: [0.78, 0.82]
+    PathFinding: [0.38, 0.75]
+    TxQ and Fees: [0.25, 0.65]
+    Ledger Sync: [0.62, 0.58]
+    Peer Message Tracing: [0.35, 0.25]
+    JobQueue Tracing: [0.2, 0.48]
+    Validator Mgmt: [0.48, 0.42]
+    Amendment Tracking: [0.15, 0.32]
+    SHAMap Operations: [0.72, 0.45]
+```
+
+---
+
+## 1.8 Observable Outcomes
+
+> **TxQ** = Transaction Queue | **UNL** = Unique Node List
+
+After implementing OpenTelemetry, operators and developers will gain visibility into the following:
+
+### 1.8.1 What You Will See: Traces
+
+| Trace Type                 | Description                                                                                 | Example Query in Grafana/Tempo                         |
+| -------------------------- | ------------------------------------------------------------------------------------------- | ------------------------------------------------------ |
+| **Transaction Lifecycle**  | Full journey from RPC submission through validation, relay, consensus, and ledger inclusion | `{service.name="rippled" && xrpl.tx.hash="ABC123..."}` |
+| **Cross-Node Propagation** | Transaction path across multiple rippled nodes with timing                                  | `{xrpl.tx.relay_count > 0}`                            |
+| **Consensus Rounds**       | Complete round with all phases (open, establish, accept)                                    | `{span.name=~"consensus.round.*"}`                     |
+| **RPC Request Processing** | Individual command execution with timing breakdown                                          | `{xrpl.rpc.command="account_info"}`                    |
+| **Ledger Acquisition**     | Peer-to-peer ledger data requests and responses                                             | `{span.name="ledger.acquire"}`                         |
+| **PathFinding Latency**    | Path computation time and cache effectiveness for payment RPCs                              | `{span.name="pathfind.compute"}`                       |
+| **TxQ Behavior**           | Queue depth, eviction patterns, fee escalation during congestion                            | `{span.name=~"txq.*"}`                                 |
+| **Ledger Sync**            | Full acquisition timeline including delta and transaction fetches                           | `{span.name=~"ledger.acquire.*"}`                      |
+| **Validator Health**       | UNL fetch success, manifest updates, stale list detection                                   | `{span.name=~"validator.*"}`                           |
+
+### 1.8.2 What You Will See: Metrics (Derived from Traces)
+
+| Metric                        | Description                             | Dashboard Panel             |
+| ----------------------------- | --------------------------------------- | --------------------------- |
+| **RPC Latency (p50/p95/p99)** | Response time distribution per command  | Heatmap by command          |
+| **Transaction Throughput**    | Transactions processed per second       | Time series graph           |
+| **Consensus Round Duration**  | Time to complete consensus phases       | Histogram                   |
+| **Cross-Node Latency**        | Time for transaction to reach N nodes   | Line chart with percentiles |
+| **Error Rate**                | Failed transactions/RPC calls by type   | Stacked bar chart           |
+| **PathFinding Latency**       | Path computation time per currency pair | Heatmap by currency         |
+| **TxQ Depth**                 | Queued transactions over time           | Time series with thresholds |
+| **Fee Escalation Level**      | Current fee multiplier                  | Gauge with alert thresholds |
+| **Ledger Sync Duration**      | Time to acquire missing ledgers         | Histogram                   |
+
+### 1.8.3 Concrete Dashboard Examples
+
+**Transaction Trace View (Tempo):**
+
+```
+┌────────────────────────────────────────────────────────────────────────────────┐
+│ Trace: abc123... (Transaction Submission)                    Duration: 847ms   │
+├────────────────────────────────────────────────────────────────────────────────┤
+│ ├── rpc.request [ServerHandler]                              ████░░░░░░  45ms  │
+│ │   └── rpc.command.submit [RPCHandler]                      ████░░░░░░  42ms  │
+│ │       └── tx.receive [NetworkOPs]                          ███░░░░░░░  35ms  │
+│ │           ├── tx.validate [TxQ]                            █░░░░░░░░░   8ms  │
+│ │           └── tx.relay [Overlay]                           ██░░░░░░░░  15ms  │
+│ │               ├── tx.receive [Node-B]                      █████░░░░░  52ms  │
+│ │               │   └── tx.relay [Node-B]                    ██░░░░░░░░  18ms  │
+│ │               └── tx.receive [Node-C]                      ██████░░░░  65ms  │
+│ └── consensus.round [RCLConsensus]                           ████████░░ 720ms  │
+│     ├── consensus.phase.open                                 ██░░░░░░░░ 180ms  │
+│     ├── consensus.phase.establish                            █████░░░░░ 480ms  │
+│     └── consensus.phase.accept                               █░░░░░░░░░  60ms  │
+└────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**RPC Performance Dashboard Panel:**
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│ RPC Command Latency (Last 1 Hour)                           │
+├─────────────────────────────────────────────────────────────┤
+│ Command          │ p50    │ p95    │ p99    │ Errors │ Rate │
+│──────────────────┼────────┼────────┼────────┼────────┼──────│
+│ account_info     │  12ms  │  45ms  │  89ms  │  0.1%  │ 150/s│
+│ submit           │  35ms  │ 120ms  │ 250ms  │  2.3%  │  45/s│
+│ ledger           │   8ms  │  25ms  │  55ms  │  0.0%  │  80/s│
+│ tx               │  15ms  │  50ms  │ 100ms  │  0.5%  │  60/s│
+│ server_info      │   5ms  │  12ms  │  20ms  │  0.0%  │ 200/s│
+└─────────────────────────────────────────────────────────────┘
+```
+
+**Consensus Health Dashboard Panel:**
+
+```mermaid
+---
+config:
+    xyChart:
+        width: 1200
+        height: 400
+        plotReservedSpacePercent: 50
+        chartOrientation: vertical
+    themeVariables:
+        xyChart:
+            plotColorPalette: "#3498db"
+---
+xychart-beta
+    title "Consensus Round Duration (Last 24 Hours)"
+    x-axis "Time of Day (Hours)" [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]
+    y-axis "Duration (seconds)" 1 --> 5
+    line [2.1, 2.4, 2.8, 3.2, 3.8, 4.3, 4.5, 5.0, 4.7, 4.0, 3.2, 2.6, 2.0]
+```
+
+### 1.8.4 Operator Actionable Insights
+
+| Scenario                  | What You'll See                                                              | Action                                           |
+| ------------------------- | ---------------------------------------------------------------------------- | ------------------------------------------------ |
+| **Slow RPC**              | Span showing which phase is slow (parsing, execution, serialization)         | Optimize specific code path                      |
+| **Transaction Stuck**     | Trace stops at validation; error attribute shows reason                      | Fix transaction parameters                       |
+| **Consensus Delay**       | Phase.establish taking too long; proposer attribute shows missing validators | Investigate network connectivity                 |
+| **Memory Spike**          | Large batch of spans correlating with memory increase                        | Tune batch_size or sampling                      |
+| **Network Partition**     | Traces missing cross-node links for specific peer                            | Check peer connectivity                          |
+| **Path Computation Slow** | pathfind.compute span shows high latency; cache miss rate in attributes      | Warm the RippleLineCache, check order book depth |
+| **TxQ Full**              | txq.enqueue spans show evictions; fee.escalate spans increasing              | Monitor fee levels, alert operators              |
+| **Ledger Sync Stalled**   | ledger.acquire spans timing out; peer reliability attributes show issues     | Check peer connectivity, add trusted peers       |
+| **UNL Stale**             | validator.list.fetch spans failing; last_update attribute aging              | Verify validator site URLs, check DNS            |
+
+### 1.8.5 Developer Debugging Workflow
+
+1. **Find Transaction**: Query by `xrpl.tx.hash` to get full trace
+2. **Identify Bottleneck**: Look at span durations to find slowest component
+3. **Check Attributes**: Review `xrpl.tx.validity`, `xrpl.rpc.status` for errors
+4. **Correlate Logs**: Use `trace_id` to find related PerfLog entries
+5. **Compare Nodes**: Filter by `service.instance.id` to compare behavior across nodes
+
+---
+
+_Next: [Design Decisions](./02-design-decisions.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
--- a/OpenTelemetryPlan/02-design-decisions.md
+++ b/OpenTelemetryPlan/02-design-decisions.md
@@ -0,0 +1,627 @@
+# Design Decisions
+
+> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
+> **Related**: [Architecture Analysis](./01-architecture-analysis.md) | [Code Samples](./04-code-samples.md)
+
+---
+
+## 2.1 OpenTelemetry Components
+
+> **OTLP** = OpenTelemetry Protocol
+
+### 2.1.1 SDK Selection
+
+**Primary Choice**: OpenTelemetry C++ SDK (`opentelemetry-cpp`)
+
+| Component                               | Purpose                | Required    |
+| --------------------------------------- | ---------------------- | ----------- |
+| `opentelemetry-cpp::api`                | Tracing API headers    | Yes         |
+| `opentelemetry-cpp::sdk`                | SDK implementation     | Yes         |
+| `opentelemetry-cpp::ext`                | Extensions (exporters) | Yes         |
+| `opentelemetry-cpp::otlp_grpc_exporter` | OTLP/gRPC export       | Recommended |
+| `opentelemetry-cpp::otlp_http_exporter` | OTLP/HTTP export       | Alternative |
+
+### 2.1.2 Instrumentation Strategy
+
+**Manual Instrumentation** (recommended):
+
+| Approach   | Pros                                                              | Cons                                                    |
+| ---------- | ----------------------------------------------------------------- | ------------------------------------------------------- |
+| **Manual** | Precise control, optimized placement, rippled-specific attributes | More development effort                                 |
+| **Auto**   | Less code, automatic coverage                                     | Less control, potential overhead, limited customization |
+
+---
+
+## 2.2 Exporter Configuration
+
+> **OTLP** = OpenTelemetry Protocol
+
+```mermaid
+flowchart TB
+    subgraph nodes["rippled Nodes"]
+        node1["rippled<br/>Node 1"]
+        node2["rippled<br/>Node 2"]
+        node3["rippled<br/>Node 3"]
+    end
+
+    collector["OpenTelemetry<br/>Collector<br/>(sidecar or standalone)"]
+
+    subgraph backends["Observability Backends"]
+        tempo["Tempo"]
+        elastic["Elastic<br/>APM"]
+    end
+
+    node1 -->|"OTLP/gRPC<br/>:4317"| collector
+    node2 -->|"OTLP/gRPC<br/>:4317"| collector
+    node3 -->|"OTLP/gRPC<br/>:4317"| collector
+
+    collector --> tempo
+    collector --> elastic
+
+    style nodes fill:#0d47a1,stroke:#082f6a,color:#ffffff
+    style backends fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style collector fill:#bf360c,stroke:#8c2809,color:#ffffff
+```
+
+**Reading the diagram:**
+
+- **rippled Nodes (blue)**: The source of telemetry data. Each rippled node exports spans via OTLP/gRPC on port 4317.
+- **OpenTelemetry Collector (red)**: The central aggregation point that receives spans from all nodes. Can run as a sidecar (per-node) or standalone (shared). Handles batching, filtering, and routing.
+- **Observability Backends (green)**: The storage and visualization destinations. Tempo is the recommended backend for both development and production, and Elastic APM is an alternative. The Collector routes to one or more backends.
+- **Arrows (nodes to collector to backends)**: The data pipeline -- spans flow from nodes to the Collector over gRPC, then the Collector fans out to the configured backends.
+
+### 2.2.1 OTLP/gRPC (Recommended)
+
+```cpp
+// Configuration for OTLP over gRPC
+namespace otlp = opentelemetry::exporter::otlp;
+
+otlp::OtlpGrpcExporterOptions opts;
+opts.endpoint = "localhost:4317";
+opts.useTls = true;
+opts.sslCaCertPath = "/path/to/ca.crt";
+```
+
+### 2.2.2 OTLP/HTTP (Alternative)
+
+```cpp
+// Configuration for OTLP over HTTP
+namespace otlp = opentelemetry::exporter::otlp;
+
+otlp::OtlpHttpExporterOptions opts;
+opts.url = "http://localhost:4318/v1/traces";
+opts.content_type = otlp::HttpRequestContentType::kJson;  // or kBinary
+```
+
+---
+
+## 2.3 Span Naming Conventions
+
+> **TxQ** = Transaction Queue | **UNL** = Unique Node List | **WS** = WebSocket
+
+### 2.3.1 Naming Schema
+
+```
+<component>.<operation>[.<sub-operation>]
+```
+
+**Examples**:
+
+- `tx.receive` - Transaction received from peer
+- `consensus.phase.establish` - Consensus establish phase
+- `rpc.command.server_info` - server_info RPC command
+
+### 2.3.2 Complete Span Catalog
+
+```yaml
+# Transaction Spans
+tx:
+  receive: "Transaction received from network"
+  validate: "Transaction signature/format validation"
+  process: "Full transaction processing"
+  relay: "Transaction relay to peers"
+  apply: "Apply transaction to ledger"
+
+# Consensus Spans
+consensus:
+  round: "Complete consensus round"
+  phase:
+    open: "Open phase - collecting transactions"
+    establish: "Establish phase - reaching agreement"
+    accept: "Accept phase - applying consensus"
+  proposal:
+    receive: "Receive peer proposal"
+    send: "Send our proposal"
+  validation:
+    receive: "Receive peer validation"
+    send: "Send our validation"
+
+# RPC Spans
+rpc:
+  request: "HTTP/WebSocket request handling"
+  command:
+    "*": "Specific RPC command (dynamic)"
+
+# Peer Spans
+peer:
+  connect: "Peer connection establishment"
+  disconnect: "Peer disconnection"
+  message:
+    send: "Send protocol message"
+    receive: "Receive protocol message"
+
+# Ledger Spans
+ledger:
+  acquire: "Ledger acquisition from network"
+  build: "Build new ledger"
+  validate: "Ledger validation"
+  close: "Close ledger"
+  replay: "Ledger replay executed"
+  delta: "Delta-based ledger acquired"
+
+# PathFinding Spans
+pathfind:
+  request: "Path request initiated"
+  compute: "Path computation executed"
+
+# TxQ Spans
+txq:
+  enqueue: "Transaction queued"
+  apply: "Queued transaction applied"
+
+# Fee/Load Spans
+fee:
+  escalate: "Fee escalation triggered"
+
+# Validator Spans
+validator:
+  list:
+    fetch: "UNL list fetched"
+  manifest: "Manifest update processed"
+
+# Amendment Spans
+amendment:
+  vote: "Amendment voting executed"
+
+# SHAMap Spans
+shamap:
+  sync: "State tree synchronization"
+
+# Job Spans
+job:
+  enqueue: "Job added to queue"
+  execute: "Job execution"
+```
+
+---
+
+## 2.4 Attribute Schema
+
+> **TxQ** = Transaction Queue | **UNL** = Unique Node List | **OTLP** = OpenTelemetry Protocol
+
+### 2.4.1 Resource Attributes (Set Once at Startup)
+
+```cpp
+// Standard OpenTelemetry semantic conventions
+resource::SemanticConventions::SERVICE_NAME        = "rippled"
+resource::SemanticConventions::SERVICE_VERSION     = BuildInfo::getVersionString()
+resource::SemanticConventions::SERVICE_INSTANCE_ID = <node_public_key_base58>
+
+// Custom rippled attributes
+"xrpl.network.id"      = <network_id>           // e.g., 0 for mainnet
+"xrpl.network.type"    = "mainnet" | "testnet" | "devnet" | "standalone"
+"xrpl.node.type"       = "validator" | "stock" | "reporting"
+"xrpl.node.cluster"    = <cluster_name>         // If clustered
+```
+
+### 2.4.2 Span Attributes by Category
+
+#### Transaction Attributes
+
+```cpp
+"xrpl.tx.hash"         = string   // Transaction hash (hex)
+"xrpl.tx.type"         = string   // "Payment", "OfferCreate", etc.
+"xrpl.tx.account"      = string   // Source account (redacted in prod)
+"xrpl.tx.sequence"     = int64    // Account sequence number
+"xrpl.tx.fee"          = int64    // Fee in drops
+"xrpl.tx.result"       = string   // "tesSUCCESS", "tecPATH_DRY", etc.
+"xrpl.tx.ledger_index" = int64    // Ledger containing transaction
+```
+
+#### Consensus Attributes
+
+```cpp
+"xrpl.consensus.round"          = int64    // Round number
+"xrpl.consensus.phase"          = string   // "open", "establish", "accept"
+"xrpl.consensus.mode"           = string   // "proposing", "observing", etc.
+"xrpl.consensus.proposers"      = int64    // Number of proposers
+"xrpl.consensus.ledger.prev"    = string   // Previous ledger hash
+"xrpl.consensus.ledger.seq"     = int64    // Ledger sequence
+"xrpl.consensus.tx_count"       = int64    // Transactions in consensus set
+"xrpl.consensus.duration_ms"    = float64  // Round duration
+
+// Phase 4a: Establish-phase gap fill & cross-node correlation
+"xrpl.consensus.round_id"          = int64    // Consensus round number
+"xrpl.consensus.ledger_id"         = string   // previousLedger.id() — shared across nodes
+"xrpl.consensus.trace_strategy"    = string   // "deterministic" or "attribute"
+"xrpl.consensus.converge_percent"  = int64    // Convergence % (0-100+)
+"xrpl.consensus.establish_count"   = int64    // Number of establish iterations
+"xrpl.consensus.disputes_count"    = int64    // Active disputed transactions
+"xrpl.consensus.proposers_agreed"  = int64    // Peers agreeing with our position
+"xrpl.consensus.proposers_total"   = int64    // Total peer positions
+"xrpl.consensus.agree_count"       = int64    // Peers that agree (haveConsensus)
+"xrpl.consensus.disagree_count"    = int64    // Peers that disagree
+"xrpl.consensus.threshold_percent" = int64    // Current threshold (50/65/70/95)
+"xrpl.consensus.result"            = string   // "yes", "no", "moved_on"
+"xrpl.consensus.mode.old"          = string   // Previous consensus mode
+"xrpl.consensus.mode.new"          = string   // New consensus mode
+```
+
+#### RPC Attributes
+
+```cpp
+"xrpl.rpc.command"     = string   // Command name
+"xrpl.rpc.version"     = int64    // API version
+"xrpl.rpc.role"        = string   // "admin" or "user"
+"xrpl.rpc.params"      = string   // Sanitized parameters (optional)
+```
+
+#### Peer & Message Attributes
+
+```cpp
+"xrpl.peer.id"            = string   // Peer public key (base58)
+"xrpl.peer.address"       = string   // IP:port
+"xrpl.peer.latency_ms"    = float64  // Measured latency
+"xrpl.peer.cluster"       = string   // Cluster name if clustered
+"xrpl.message.type"       = string   // Protocol message type name
+"xrpl.message.size_bytes" = int64    // Message size
+"xrpl.message.compressed" = bool     // Whether compressed
+```
+
+#### Ledger & Job Attributes
+
+```cpp
+"xrpl.ledger.hash"       = string   // Ledger hash
+"xrpl.ledger.index"      = int64    // Ledger sequence/index
+"xrpl.ledger.close_time" = int64    // Close time (epoch)
+"xrpl.ledger.tx_count"   = int64    // Transaction count
+"xrpl.job.type"          = string   // Job type name
+"xrpl.job.queue_ms"      = float64  // Time spent in queue
+"xrpl.job.worker"        = int64    // Worker thread ID
+```
+
+#### PathFinding Attributes
+
+```cpp
+"xrpl.pathfind.source_currency"  = string   // Source currency code
+"xrpl.pathfind.dest_currency"    = string   // Destination currency code
+"xrpl.pathfind.path_count"       = int64    // Number of paths found
+"xrpl.pathfind.cache_hit"        = bool     // RippleLineCache hit
+```
+
+#### TxQ Attributes
+
+```cpp
+"xrpl.txq.queue_depth"      = int64    // Current queue depth
+"xrpl.txq.fee_level"        = int64    // Fee level of transaction
+"xrpl.txq.eviction_reason"  = string   // Why transaction was evicted
+```
+
+#### Fee Attributes
+
+```cpp
+"xrpl.fee.load_factor"      = int64    // Current load factor
+"xrpl.fee.escalation_level" = int64    // Fee escalation multiplier
+```
+
+#### Validator Attributes
+
+```cpp
+"xrpl.validator.list_size"    = int64    // UNL size
+"xrpl.validator.list_age_sec" = int64    // Seconds since last update
+```
+
+#### Amendment Attributes
+
+```cpp
+"xrpl.amendment.name"         = string   // Amendment name
+"xrpl.amendment.status"       = string   // "enabled", "vetoed", "supported"
+```
+
+#### SHAMap Attributes
+
+```cpp
+"xrpl.shamap.type"            = string   // "transaction", "state", "account_state"
+"xrpl.shamap.missing_nodes"   = int64    // Number of missing nodes during sync
+"xrpl.shamap.duration_ms"     = float64  // Sync duration
+```
+
+### 2.4.3 Data Collection Summary
+
+The following table summarizes what data is collected by category:
+
+| Category        | Attributes Collected                                                   | Purpose                      |
+| --------------- | ---------------------------------------------------------------------- | ---------------------------- |
+| **Transaction** | `tx.hash`, `tx.type`, `tx.result`, `tx.fee`, `ledger_index`            | Trace transaction lifecycle  |
+| **Consensus**   | `round`, `phase`, `mode`, `proposers` (public keys), `duration_ms`     | Analyze consensus timing     |
+| **RPC**         | `command`, `version`, `status`, `duration_ms`                          | Monitor RPC performance      |
+| **Peer**        | `peer.id` (public key), `latency_ms`, `message.type`, `message.size`   | Network topology analysis    |
+| **Ledger**      | `ledger.hash`, `ledger.index`, `close_time`, `tx_count`                | Ledger progression tracking  |
+| **Job**         | `job.type`, `queue_ms`, `worker`                                       | JobQueue performance         |
+| **PathFinding** | `pathfind.source_currency`, `dest_currency`, `path_count`, `cache_hit` | Payment path analysis        |
+| **TxQ**         | `txq.queue_depth`, `fee_level`, `eviction_reason`                      | Queue depth and fee tracking |
+| **Fee**         | `fee.load_factor`, `escalation_level`                                  | Fee escalation monitoring    |
+| **Validator**   | `validator.list_size`, `list_age_sec`                                  | UNL health monitoring        |
+| **Amendment**   | `amendment.name`, `status`                                             | Protocol upgrade tracking    |
+| **SHAMap**      | `shamap.type`, `missing_nodes`, `duration_ms`                          | State tree sync performance  |
+
+### 2.4.4 Privacy & Sensitive Data Policy
+
+> **PII** = Personally Identifiable Information
+
+OpenTelemetry instrumentation is designed to collect **operational metadata only**, never sensitive content.
+
+#### Data NOT Collected
+
+The following data is explicitly **excluded** from telemetry collection:
+
+| Excluded Data           | Reason                                    |
+| ----------------------- | ----------------------------------------- |
+| **Private Keys**        | Never exposed; not relevant to tracing    |
+| **Account Balances**    | Financial data; privacy sensitive         |
+| **Transaction Amounts** | Financial data; privacy sensitive         |
+| **Raw TX Payloads**     | May contain sensitive memo/data fields    |
+| **Personal Data**       | No PII collected                          |
+| **IP Addresses**        | Configurable; excluded by default in prod |
+
+#### Privacy Protection Mechanisms
+
+| Mechanism                     | Description                                                               |
+| ----------------------------- | ------------------------------------------------------------------------- |
+| **Account Hashing**           | `xrpl.tx.account` is hashed at collector level before storage             |
+| **Configurable Redaction**    | Sensitive fields can be excluded via `[telemetry]` config section         |
+| **Sampling**                  | Only 10% of traces recorded by default, reducing data exposure            |
+| **Local Control**             | Node operators have full control over what gets exported                  |
+| **No Raw Payloads**           | Transaction content is never recorded, only metadata (hash, type, result) |
+| **Collector-Level Filtering** | Additional redaction/hashing can be configured at OTel Collector          |
+
+#### Collector-Level Data Protection
+
+The OpenTelemetry Collector can be configured to hash or redact sensitive attributes before export:
+
+```yaml
+processors:
+  attributes:
+    actions:
+      # Hash account addresses before storage
+      - key: xrpl.tx.account
+        action: hash
+      # Remove IP addresses entirely
+      - key: xrpl.peer.address
+        action: delete
+      # Redact specific fields
+      - key: xrpl.rpc.params
+        action: delete
+```
+
+#### Configuration Options for Privacy
+
+In `rippled.cfg`, operators can control data collection granularity:
+
+```ini
+[telemetry]
+enabled=1
+
+# Disable collection of specific components
+trace_transactions=1
+trace_consensus=1
+trace_rpc=1
+trace_peer=0          # Disable peer tracing (high volume, includes addresses)
+
+# Redact specific attributes
+redact_account=1      # Hash account addresses before export
+redact_peer_address=1 # Remove peer IP addresses
+```
+
+> **Note**: The `redact_account` configuration in `rippled.cfg` controls SDK-level redaction before export, while collector-level filtering (see [Collector-Level Data Protection](#collector-level-data-protection) above) provides an additional defense-in-depth layer. Both can operate independently.
+
+> **Key Principle**: Telemetry collects **operational metadata** (timing, counts, hashes) — never **sensitive content** (keys, balances, amounts, raw payloads).
+
+---
+
+## 2.5 Context Propagation Design
+
+> **WS** = WebSocket
+
+### 2.5.1 Propagation Boundaries
+
+```mermaid
+flowchart TB
+    subgraph http["HTTP/WebSocket (RPC)"]
+        w3c["W3C Trace Context Headers:<br/>traceparent:<br/>00-trace_id-span_id-flags<br/>tracestate: rippled=..."]
+    end
+
+    subgraph protobuf["Protocol Buffers (P2P)"]
+        proto["message TraceContext {<br/>  bytes trace_id = 1;  // 16 bytes<br/>  bytes span_id = 2;   // 8 bytes<br/>  uint32 trace_flags = 3;<br/>  string trace_state = 4;<br/>}"]
+    end
+
+    subgraph jobqueue["JobQueue (Internal Async)"]
+        job["Context captured at job creation,<br/>restored at execution<br/><br/>class Job {<br/>  otel::context::Context<br/>    traceContext_;<br/>};"]
+    end
+
+    style http fill:#0d47a1,stroke:#082f6a,color:#ffffff
+    style protobuf fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style jobqueue fill:#bf360c,stroke:#8c2809,color:#ffffff
+```
+
+**Reading the diagram:**
+
+- **HTTP/WebSocket - RPC (blue)**: For client-facing RPC requests, trace context is propagated using the W3C `traceparent` header. This is the standard approach and works with any OTel-compatible client.
+- **Protocol Buffers - P2P (green)**: For peer-to-peer messages between rippled nodes, trace context is embedded as a protobuf `TraceContext` message carrying trace_id, span_id, flags, and optional trace_state.
+- **JobQueue - Internal Async (red)**: For asynchronous work within a single node, the OTel context is captured when a job is created and restored when the job executes on a worker thread. This bridges the async gap so spans remain linked.
+
+---
+
+## 2.6 Integration with Existing Observability
+
+> **OTLP** = OpenTelemetry Protocol | **WS** = WebSocket
+
+### 2.6.1 Existing Frameworks Comparison
+
+rippled already has two observability mechanisms. OpenTelemetry complements (not replaces) them:
+
+| Aspect                | PerfLog                       | Beast Insight (StatsD)       | OpenTelemetry             |
+| --------------------- | ----------------------------- | ---------------------------- | ------------------------- |
+| **Type**              | Logging                       | Metrics                      | Distributed Tracing       |
+| **Data**              | JSON log entries              | Counters, gauges, histograms | Spans with context        |
+| **Scope**             | Single node                   | Single node                  | **Cross-node**            |
+| **Output**            | `perf.log` file               | StatsD server                | OTLP Collector            |
+| **Question answered** | "What happened on this node?" | "How many? How fast?"        | "What was the journey?"   |
+| **Correlation**       | By timestamp                  | By metric name               | By `trace_id`             |
+| **Overhead**          | Low (file I/O)                | Low (UDP packets)            | Low-Medium (configurable) |
+
+### 2.6.2 What Each Framework Does Best
+
+#### PerfLog
+
+- **Purpose**: Detailed local event logging for RPC and job execution
+- **Strengths**:
+  - Rich JSON output with timing data
+  - Already integrated in RPC handlers
+  - File-based, no external dependencies
+- **Limitations**:
+  - Single-node only (no cross-node correlation)
+  - No parent-child relationships between events
+  - Manual log parsing required
+
+```json
+// Example PerfLog entry
+{
+  "time": "2024-01-15T10:30:00.123Z",
+  "method": "submit",
+  "duration_us": 1523,
+  "result": "tesSUCCESS"
+}
+```
+
+#### Beast Insight (StatsD)
+
+- **Purpose**: Real-time metrics for monitoring dashboards
+- **Strengths**:
+  - Aggregated metrics (counters, gauges, histograms)
+  - Low overhead (UDP, fire-and-forget)
+  - Good for alerting thresholds
+- **Limitations**:
+  - No request-level detail
+  - No causal relationships
+  - Single-node perspective
+
+```cpp
+// Example StatsD usage in rippled
+insight.increment("rpc.submit.count");
+insight.gauge("ledger.age", age);
+insight.timing("consensus.round", duration);
+```
+
+#### OpenTelemetry (NEW)
+
+- **Purpose**: Distributed request tracing across nodes
+- **Strengths**:
+  - **Cross-node correlation** via `trace_id`
+  - Parent-child span relationships
+  - Rich attributes per span
+  - Industry standard (CNCF)
+- **Limitations**:
+  - Requires collector infrastructure
+  - Higher complexity than logging
+
+```cpp
+// Example OpenTelemetry span
+auto span = telemetry.startSpan("tx.relay");
+span->SetAttribute("tx.hash", hash);
+span->SetAttribute("peer.id", peerId);
+// Span automatically linked to parent via context
+```
+
+### 2.6.3 When to Use Each
+
+| Scenario                                | PerfLog    | StatsD | OpenTelemetry |
+| --------------------------------------- | ---------- | ------ | ------------- |
+| "How many TXs per second?"              | ❌         | ✅     | ✅            |
+| "What's the p99 RPC latency?"           | ❌         | ✅     | ✅            |
+| "Why was this specific TX slow?"        | ⚠️ partial | ❌     | ✅            |
+| "Which node delayed consensus?"         | ❌         | ❌     | ✅            |
+| "What happened on node X at time T?"    | ✅         | ❌     | ✅            |
+| "Show me the TX journey across 5 nodes" | ❌         | ❌     | ✅            |
+
+### 2.6.4 Coexistence Strategy
+
+```mermaid
+flowchart TB
+    subgraph rippled["rippled Process"]
+        perflog["PerfLog<br/>(JSON to file)"]
+        insight["Beast Insight<br/>(StatsD)"]
+        otel["OpenTelemetry<br/>(Tracing)"]
+    end
+
+    perflog --> perffile["perf.log"]
+    insight --> statsd["StatsD Server"]
+    otel --> collector["OTLP Collector"]
+
+    perffile --> grafana["Grafana<br/>(Unified UI)"]
+    statsd --> grafana
+    collector --> grafana
+
+    style rippled fill:#212121,stroke:#0a0a0a,color:#ffffff
+    style grafana fill:#bf360c,stroke:#8c2809,color:#ffffff
+```
+
+**Reading the diagram:**
+
+- **rippled Process (dark gray)**: The single rippled node running all three observability frameworks side by side. Each framework operates independently with no interference.
+- **PerfLog to perf.log**: PerfLog writes JSON-formatted event logs to a local file. Grafana can ingest these via Loki or a file-based datasource.
+- **Beast Insight to StatsD Server**: Insight sends aggregated metrics (counters, gauges) over UDP to a StatsD server. Grafana reads from StatsD-compatible backends like Graphite or Prometheus (via StatsD exporter).
+- **OpenTelemetry to OTLP Collector**: OTel exports spans over OTLP/gRPC to a Collector, which then forwards to a trace backend (Tempo).
+- **Grafana (red, unified UI)**: All three data streams converge in Grafana, enabling operators to correlate logs, metrics, and traces in a single dashboard.
+
+### 2.6.5 Correlation with PerfLog
+
+Trace IDs can be correlated with existing PerfLog entries for comprehensive debugging:
+
+```cpp
+// In RPCHandler.cpp - correlate trace with PerfLog
+Status doCommand(RPC::JsonContext& context, Json::Value& result)
+{
+    // Start OpenTelemetry span
+    auto span = context.app.getTelemetry().startSpan(
+        "rpc.command." + context.method);
+
+    // Get trace ID for correlation
+    auto traceId = span->GetContext().trace_id().IsValid()
+        ? toHex(span->GetContext().trace_id())
+        : "";
+
+    // Use existing PerfLog with trace correlation
+    auto const curId = context.app.getPerfLog().currentId();
+    context.app.getPerfLog().rpcStart(context.method, curId);
+
+    // Future: Add trace ID to PerfLog entry
+    // context.app.getPerfLog().setTraceId(curId, traceId);
+
+    try {
+        auto ret = handler(context, result);
+        context.app.getPerfLog().rpcFinish(context.method, curId);
+        span->SetStatus(opentelemetry::trace::StatusCode::kOk);
+        return ret;
+    } catch (std::exception const& e) {
+        context.app.getPerfLog().rpcError(context.method, curId);
+        span->RecordException(e);
+        span->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
+        throw;
+    }
+}
+```
+
+---
+
+_Previous: [Architecture Analysis](./01-architecture-analysis.md)_ | _Next: [Implementation Strategy](./03-implementation-strategy.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
--- a/OpenTelemetryPlan/03-implementation-strategy.md
+++ b/OpenTelemetryPlan/03-implementation-strategy.md
@@ -0,0 +1,528 @@
+# Implementation Strategy
+
+> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
+> **Related**: [Code Samples](./04-code-samples.md) | [Configuration Reference](./05-configuration-reference.md)
+
+---
+
+## 3.1 Directory Structure
+
+The telemetry implementation follows rippled's existing code organization pattern:
+
+```
+include/xrpl/
+├── telemetry/
+│   ├── Telemetry.h              # Main telemetry interface
+│   ├── TelemetryConfig.h        # Configuration structures
+│   ├── TraceContext.h           # Context propagation utilities
+│   ├── SpanGuard.h              # RAII span management
+│   └── SpanAttributes.h         # Attribute helper functions
+
+src/libxrpl/
+├── telemetry/
+│   ├── Telemetry.cpp            # Implementation
+│   ├── TelemetryConfig.cpp      # Config parsing
+│   ├── TraceContext.cpp         # Context serialization
+│   └── NullTelemetry.cpp        # No-op implementation
+
+src/xrpld/
+├── telemetry/
+│   ├── TracingInstrumentation.h # Instrumentation macros
+│   └── TracingInstrumentation.cpp
+```
+
+---
+
+## 3.2 Implementation Approach
+
+<div align="center">
+
+```mermaid
+%%{init: {'flowchart': {'nodeSpacing': 20, 'rankSpacing': 30}}}%%
+flowchart TB
+    subgraph phase1["Phase 1: Core"]
+        direction LR
+        sdk["SDK Integration"] ~~~ interface["Telemetry Interface"] ~~~ config["Configuration"]
+    end
+
+    subgraph phase2["Phase 2: RPC"]
+        direction LR
+        http["HTTP Context"] ~~~ rpc["RPC Handlers"]
+    end
+
+    subgraph phase3["Phase 3: P2P"]
+        direction LR
+        proto["Protobuf Context"] ~~~ tx["Transaction Relay"]
+    end
+
+    subgraph phase4["Phase 4: Consensus"]
+        direction LR
+        consensus["Consensus Rounds"] ~~~ proposals["Proposals"]
+    end
+
+    phase1 --> phase2 --> phase3 --> phase4
+
+    style phase1 fill:#1565c0,stroke:#0d47a1,color:#ffffff
+    style phase2 fill:#2e7d32,stroke:#1b5e20,color:#ffffff
+    style phase3 fill:#e65100,stroke:#bf360c,color:#ffffff
+    style phase4 fill:#c2185b,stroke:#880e4f,color:#ffffff
+```
+
+</div>
+
+### Key Principles
+
+1. **Minimal Intrusion**: Instrumentation should not alter existing control flow
+2. **Zero-Cost When Disabled**: Use compile-time flags and no-op implementations
+3. **Backward Compatibility**: Protocol Buffer extensions use high field numbers
+4. **Graceful Degradation**: Tracing failures must not affect node operation
+
+---
+
+## 3.3 Performance Overhead Summary
+
+> **OTLP** = OpenTelemetry Protocol
+
+| Metric        | Overhead   | Notes                                            |
+| ------------- | ---------- | ------------------------------------------------ |
+| CPU           | 1-3%       | Of per-transaction CPU cost (~200μs baseline)    |
+| Memory        | ~10 MB     | SDK statics + batch buffer + worker thread stack |
+| Network       | 10-50 KB/s | Compressed OTLP export to collector              |
+| Latency (p99) | <2%        | With proper sampling configuration               |
+
+---
+
+## 3.4 Detailed CPU Overhead Analysis
+
+### 3.4.1 Per-Operation Costs
+
+> **Note on hardware assumptions**: The costs below are based on the official OTel C++ SDK CI benchmarks
+> (969 runs on GitHub Actions 2-core shared runners). On production server hardware (3+ GHz Xeon),
+> expect costs at the **lower end** of each range (~30-50% improvement over CI hardware).
+
+| Operation             | Time (ns) | Frequency              | Impact     |
+| --------------------- | --------- | ---------------------- | ---------- |
+| Span creation         | 500-1000  | Every traced operation | Low        |
+| Span end              | 100-200   | Every traced operation | Low        |
+| SetAttribute (string) | 80-120    | 3-5 per span           | Low        |
+| SetAttribute (int)    | 40-60     | 2-3 per span           | Negligible |
+| AddEvent              | 100-200   | 0-2 per span           | Low        |
+| Context injection     | 150-250   | Per outgoing message   | Low        |
+| Context extraction    | 100-180   | Per incoming message   | Low        |
+| GetCurrent context    | 10-20     | Thread-local access    | Negligible |
+
+**Source**: Span creation based on OTel C++ SDK `BM_SpanCreation` benchmark (AlwaysOnSampler +
+SimpleSpanProcessor + InMemoryExporter), median ~1,000 ns on CI hardware. AddEvent includes
+timestamp read + string copy + vector push + mutex acquisition. Context injection/extraction
+confirmed by `BM_SpanCreationWithScope` benchmark delta (~160 ns).
+
+### 3.4.2 Transaction Processing Overhead
+
+<div align="center">
+
+```mermaid
+%%{init: {'pie': {'textPosition': 0.75}}}%%
+pie showData
+    "tx.receive (1400ns)" : 1400
+    "tx.validate (1200ns)" : 1200
+    "tx.relay (1200ns)" : 1200
+    "Context inject (200ns)" : 200
+```
+
+**Transaction Tracing Overhead (~4.0μs total)**
+
+</div>
+
+**Overhead percentage**: 4.0 μs / 200 μs (avg tx processing) = **~2.0%**
+
+> **Breakdown**: Each span (tx.receive, tx.validate, tx.relay) costs ~1,000 ns for creation plus
+> ~200-400 ns for 3-5 attribute sets. Context injection is ~200 ns (confirmed by benchmarks).
+> On production hardware, expect ~2.6 μs total (~1.3% overhead) due to faster span creation (~500-600 ns).
+
+### 3.4.3 Consensus Round Overhead
+
+| Operation              | Count | Cost (ns) | Total      |
+| ---------------------- | ----- | --------- | ---------- |
+| consensus.round span   | 1     | ~1200     | ~1.2 μs    |
+| consensus.phase spans  | 3     | ~1100     | ~3.3 μs    |
+| proposal.receive spans | ~20   | ~1100     | ~22 μs     |
+| proposal.send spans    | ~3    | ~1100     | ~3.3 μs    |
+| Context operations     | ~30   | ~200      | ~6 μs      |
+| **TOTAL**              |       |           | **~36 μs** |
+
+> **Why higher**: Each span costs ~1,000 ns creation + ~100-200 ns for 1-2 attributes, totaling ~1,100-1,200 ns.
+> Context operations remain ~200 ns (confirmed by benchmarks). On production hardware, expect ~24 μs total.
+
+**Overhead percentage**: 36 μs / 3s (typical round) = **~0.001%** (negligible)
+
+### 3.4.4 RPC Request Overhead
+
+| Operation        | Cost (ns)    |
+| ---------------- | ------------ |
+| rpc.request span | ~1200        |
+| rpc.command span | ~1100        |
+| Context extract  | ~250         |
+| Context inject   | ~200         |
+| **TOTAL**        | **~2.75 μs** |
+
+> **Why higher**: Each span costs ~1,000 ns creation + ~100-200 ns for attributes (command name,
+> version, role). Context extract/inject costs are confirmed by OTel C++ benchmarks.
+
+- Fast RPC (1ms): 2.75 μs / 1ms = **~0.275%**
+- Slow RPC (100ms): 2.75 μs / 100ms = **~0.003%**
+
+---
+
+## 3.5 Memory Overhead Analysis
+
+> **OTLP** = OpenTelemetry Protocol
+
+### 3.5.1 Static Memory
+
+| Component                            | Size        | Allocated  |
+| ------------------------------------ | ----------- | ---------- |
+| TracerProvider singleton             | ~64 KB      | At startup |
+| BatchSpanProcessor (circular buffer) | ~16 KB      | At startup |
+| BatchSpanProcessor (worker thread)   | ~8 MB       | At startup |
+| OTLP exporter (gRPC channel init)    | ~256 KB     | At startup |
+| Propagator registry                  | ~8 KB       | At startup |
+| **Total static**                     | **~8.3 MB** |            |
+
+> **Why higher than earlier estimate**: The BatchSpanProcessor's circular buffer itself is only ~16 KB
+> (2049 x 8-byte `AtomicUniquePtr` entries), but it spawns a dedicated worker thread whose default
+> stack size on Linux is ~8 MB. The OTLP gRPC exporter allocates memory for channel stubs and TLS
+> initialization. The worker thread stack dominates the static footprint.
+
+### 3.5.2 Dynamic Memory
+
+| Component            | Size per unit  | Max units  | Peak            |
+| -------------------- | -------------- | ---------- | --------------- |
+| Active span          | ~500-800 bytes | 1000       | ~500-800 KB     |
+| Queued span (export) | ~500 bytes     | 2048       | ~1 MB           |
+| Attribute storage    | ~80 bytes      | 5 per span | Included        |
+| Context storage      | ~64 bytes      | Per thread | ~6.4 KB         |
+| **Total dynamic**    |                |            | **~1.5-1.8 MB** |
+
+> **Why active spans are larger**: An active `Span` object includes the wrapper (~88 bytes: shared_ptr,
+> mutex, unique_ptr to Recordable) plus `SpanData` (~250 bytes: SpanContext, timestamps, name, status,
+> empty containers) plus attribute storage (~200-500 bytes for 3-5 string attributes in a `std::map`).
+> Source: `sdk/src/trace/span.h` and `sdk/include/opentelemetry/sdk/trace/span_data.h`.
+> Queued spans release the wrapper, keeping only `SpanData` + attributes (~500 bytes).
+
+### 3.5.3 Memory Growth Characteristics
+
+```mermaid
+---
+config:
+    xyChart:
+        width: 700
+        height: 400
+---
+xychart-beta
+    title "Memory Usage vs Span Rate (bounded by queue limit)"
+    x-axis "Spans/second" [0, 200, 400, 600, 800, 1000]
+    y-axis "Memory (MB)" 0 --> 12
+    line [8.5, 9.2, 9.6, 9.9, 10.0, 10.0]
+```
+
+**Notes**:
+
+- Memory increases with span rate but **plateaus at queue capacity** (default 2048 spans)
+- Batch export prevents unbounded growth
+- At queue limit, oldest spans are dropped (not blocked)
+- Maximum memory is bounded: ~8.3 MB static (dominated by worker thread stack) + 2048 queued spans x ~500 bytes (~1 MB) + active spans (~0.8 MB) ≈ **~10 MB ceiling**
+- The worker thread stack (~8 MB) is virtual memory; actual RSS depends on stack usage (typically much less)
+
+### 3.5.4 Performance Data Sources
+
+The overhead estimates in Sections 3.3-3.5 are derived from the following sources:
+
+| Source                                           | What it covers                                        | URL                                                                                                                                        |
+| ------------------------------------------------ | ----------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
+| OTel C++ SDK CI benchmarks (969 runs)            | Span creation, context activation, sampler overhead   | [Benchmark Dashboard](https://open-telemetry.github.io/opentelemetry-cpp/benchmarks/)                                                      |
+| `api/test/trace/span_benchmark.cc`               | API-level span creation (~22 ns no-op)                | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/api/test/trace/span_benchmark.cc)                                   |
+| `sdk/test/trace/sampler_benchmark.cc`            | SDK span creation with samplers (~1,000 ns AlwaysOn)  | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/test/trace/sampler_benchmark.cc)                                |
+| `sdk/include/.../span_data.h`                    | SpanData memory layout (~250 bytes base)              | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/include/opentelemetry/sdk/trace/span_data.h)                    |
+| `sdk/src/trace/span.h`                           | Span wrapper memory layout (~88 bytes)                | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/src/trace/span.h)                                               |
+| `sdk/include/.../batch_span_processor_options.h` | Default queue size (2048), batch size (512)           | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/include/opentelemetry/sdk/trace/batch_span_processor_options.h) |
+| `sdk/include/.../circular_buffer.h`              | CircularBuffer implementation (AtomicUniquePtr array) | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/include/opentelemetry/sdk/common/circular_buffer.h)             |
+| OTLP proto definition                            | Serialized span size estimation                       | [Proto](https://github.com/open-telemetry/opentelemetry-proto/blob/main/opentelemetry/proto/trace/v1/trace.proto)                          |
+
+---
+
+## 3.6 Network Overhead Analysis
+
+### 3.6.1 Export Bandwidth
+
+> **Bytes per span**: Estimates use ~500 bytes/span (conservative upper bound). OTLP protobuf analysis
+> shows a typical span with 3-5 string attributes serializes to ~200-300 bytes raw; with gzip
+> compression (~60-70% of raw) and batching (amortized headers), ~350 bytes/span is more realistic.
+> The table uses the conservative estimate for capacity planning.
+
+| Sampling Rate | Spans/sec | Bandwidth | Notes            |
+| ------------- | --------- | --------- | ---------------- |
+| 100%          | ~500      | ~250 KB/s | Development only |
+| 10%           | ~50       | ~25 KB/s  | Staging          |
+| 1%            | ~5        | ~2.5 KB/s | Production       |
+| Error-only    | ~1        | ~0.5 KB/s | Minimal overhead |
+
+### 3.6.2 Trace Context Propagation
+
+| Message Type           | Context Size | Messages/sec | Overhead    |
+| ---------------------- | ------------ | ------------ | ----------- |
+| TMTransaction          | 25 bytes     | ~100         | ~2.5 KB/s   |
+| TMProposeSet           | 25 bytes     | ~10          | ~250 B/s    |
+| TMValidation           | 25 bytes     | ~50          | ~1.25 KB/s  |
+| **Total P2P overhead** |              |              | **~4 KB/s** |
+
+---
+
+## 3.7 Optimization Strategies
+
+### 3.7.1 Sampling Strategies
+
+#### Tail Sampling
+
+```mermaid
+flowchart TD
+    trace["New Trace"]
+
+    trace --> errors{"Is Error?"}
+    errors -->|Yes| sample["SAMPLE"]
+    errors -->|No| consensus{"Is Consensus?"}
+
+    consensus -->|Yes| sample
+    consensus -->|No| slow{"Is Slow?"}
+
+    slow -->|Yes| sample
+    slow -->|No| prob{"Random < 10%?"}
+
+    prob -->|Yes| sample
+    prob -->|No| drop["DROP"]
+
+    style sample fill:#4caf50,stroke:#388e3c,color:#fff
+    style drop fill:#f44336,stroke:#c62828,color:#fff
+```
+
+### 3.7.2 Batch Tuning Recommendations
+
+| Environment        | Batch Size | Batch Delay | Max Queue |
+| ------------------ | ---------- | ----------- | --------- |
+| Low-latency        | 128        | 1000ms      | 512       |
+| High-throughput    | 1024       | 10000ms     | 8192      |
+| Memory-constrained | 256        | 2000ms      | 512       |
+
+### 3.7.3 Conditional Instrumentation
+
+```cpp
+// Compile-time feature flag
+#ifndef XRPL_ENABLE_TELEMETRY
+// Zero-cost when disabled
+#define XRPL_TRACE_SPAN(t, n) ((void)0)
+#endif
+
+// Runtime component filtering
+if (telemetry.shouldTracePeer())
+{
+    XRPL_TRACE_SPAN(telemetry, "peer.message.receive");
+    // ... instrumentation
+}
+// No overhead when component tracing disabled
+```
+
+---
+
+## 3.8 Links to Detailed Documentation
+
+- **[Code Samples](./04-code-samples.md)**: Complete implementation code for all components
+- **[Configuration Reference](./05-configuration-reference.md)**: Configuration options and collector setup
+- **[Implementation Phases](./06-implementation-phases.md)**: Detailed timeline and milestones
+
+---
+
+## 3.9 Code Intrusiveness Assessment
+
+> **TxQ** = Transaction Queue
+
+This section provides a detailed assessment of how intrusive the OpenTelemetry integration is to the existing rippled codebase.
+
+### 3.9.1 Files Modified Summary
+
+| Component             | Files Modified | Lines Added | Lines Changed | Architectural Impact |
+| --------------------- | -------------- | ----------- | ------------- | -------------------- |
+| **Core Telemetry**    | 5 new files    | ~800        | 0             | None (new module)    |
+| **Application Init**  | 2 files        | ~30         | ~5            | Minimal              |
+| **RPC Layer**         | 3 files        | ~80         | ~20           | Minimal              |
+| **Transaction Relay** | 4 files        | ~120        | ~40           | Low                  |
+| **Consensus**         | 3 files        | ~100        | ~30           | Low-Medium           |
+| **Protocol Buffers**  | 1 file         | ~25         | 0             | Low                  |
+| **CMake/Build**       | 3 files        | ~50         | ~10           | Minimal              |
+| **PathFinding**       | 2              | ~80         | ~5            | Minimal              |
+| **TxQ/Fee**           | 2              | ~60         | ~5            | Minimal              |
+| **Validator/Amend**   | 3              | ~40         | ~5            | Minimal              |
+| **Total**             | **~28 files**  | **~1,490**  | **~120**      | **Low**              |
+
+### 3.9.2 Detailed File Impact
+
+```mermaid
+pie title Code Changes by Component
+    "New Telemetry Module" : 800
+    "Transaction Relay" : 160
+    "Consensus" : 130
+    "RPC Layer" : 100
+    "PathFinding" : 80
+    "TxQ/Fee" : 60
+    "Validator/Amendment" : 40
+    "Application Init" : 35
+    "Protocol Buffers" : 25
+    "Build System" : 60
+```
+
+#### New Files (No Impact on Existing Code)
+
+| File                                           | Lines | Purpose              |
+| ---------------------------------------------- | ----- | -------------------- |
+| `include/xrpl/telemetry/Telemetry.h`           | ~160  | Main interface       |
+| `include/xrpl/telemetry/SpanGuard.h`           | ~120  | RAII wrapper         |
+| `include/xrpl/telemetry/TraceContext.h`        | ~80   | Context propagation  |
+| `src/xrpld/telemetry/TracingInstrumentation.h` | ~60   | Macros               |
+| `src/libxrpl/telemetry/Telemetry.cpp`          | ~200  | Implementation       |
+| `src/libxrpl/telemetry/TelemetryConfig.cpp`    | ~60   | Config parsing       |
+| `src/libxrpl/telemetry/NullTelemetry.cpp`      | ~40   | No-op implementation |
+
+#### Modified Files (Existing Rippled Code)
+
+| File                                              | Lines Added | Lines Changed | Risk Level |
+| ------------------------------------------------- | ----------- | ------------- | ---------- |
+| `src/xrpld/app/main/Application.cpp`              | ~15         | ~3            | Low        |
+| `include/xrpl/app/main/Application.h`             | ~5          | ~2            | Low        |
+| `src/xrpld/rpc/detail/ServerHandler.cpp`          | ~40         | ~10           | Low        |
+| `src/xrpld/rpc/handlers/*.cpp`                    | ~30         | ~8            | Low        |
+| `src/xrpld/overlay/detail/PeerImp.cpp`            | ~60         | ~15           | Medium     |
+| `src/xrpld/overlay/detail/OverlayImpl.cpp`        | ~30         | ~10           | Medium     |
+| `src/xrpld/app/consensus/RCLConsensus.cpp`        | ~50         | ~15           | Medium     |
+| `src/xrpld/app/consensus/RCLConsensusAdaptor.cpp` | ~40         | ~12           | Medium     |
+| `src/xrpld/core/JobQueue.cpp`                     | ~20         | ~5            | Low        |
+| `src/xrpld/app/paths/PathRequest.cpp`             | ~40         | ~3            | Low        |
+| `src/xrpld/app/paths/Pathfinder.cpp`              | ~40         | ~2            | Low        |
+| `src/xrpld/app/misc/TxQ.cpp`                      | ~40         | ~3            | Low        |
+| `src/xrpld/app/main/LoadManager.cpp`              | ~20         | ~2            | Low        |
+| `src/xrpld/app/misc/ValidatorList.cpp`            | ~20         | ~2            | Low        |
+| `src/xrpld/app/misc/AmendmentTable.cpp`           | ~10         | ~2            | Low        |
+| `src/xrpld/app/misc/Manifest.cpp`                 | ~10         | ~1            | Low        |
+| `src/xrpld/shamap/SHAMap.cpp`                     | ~20         | ~3            | Low        |
+| `src/xrpld/overlay/detail/ripple.proto`           | ~25         | 0             | Low        |
+| `CMakeLists.txt`                                  | ~40         | ~8            | Low        |
+| `cmake/FindOpenTelemetry.cmake`                   | ~50         | 0             | None (new) |
+
+### 3.9.3 Risk Assessment by Component
+
+<div align="center">
+
+**Do First** ↖ ↗ **Plan Carefully**
+
+```mermaid
+quadrantChart
+    title Code Intrusiveness Risk Matrix
+    x-axis Low Risk --> High Risk
+    y-axis Low Value --> High Value
+
+    RPC Tracing: [0.2, 0.55]
+    Transaction Relay: [0.55, 0.85]
+    Consensus Tracing: [0.75, 0.92]
+    Peer Message Tracing: [0.85, 0.35]
+    JobQueue Context: [0.3, 0.42]
+    Ledger Acquisition: [0.48, 0.65]
+    PathFinding: [0.38, 0.72]
+    TxQ and Fees: [0.25, 0.62]
+    Validator Mgmt: [0.15, 0.35]
+```
+
+**Optional** ↙ ↘ **Avoid**
+
+</div>
+
+#### Risk Level Definitions
+
+| Risk Level | Definition                                                       | Mitigation                         |
+| ---------- | ---------------------------------------------------------------- | ---------------------------------- |
+| **Low**    | Additive changes only; no modification to existing logic         | Standard code review               |
+| **Medium** | Minor modifications to existing functions; clear boundaries      | Comprehensive unit tests           |
+| **High**   | Changes to core logic or data structures; potential side effects | Integration tests + staged rollout |
+
+### 3.9.4 Architectural Impact Assessment
+
+| Aspect               | Impact  | Justification                                                                    |
+| -------------------- | ------- | -------------------------------------------------------------------------------- |
+| **Data Flow**        | Minimal | Read-only instrumentation; no modification to consensus or transaction data flow |
+| **Threading Model**  | Minimal | Context propagation uses thread-local storage (standard OTel pattern)            |
+| **Memory Model**     | Low     | Bounded queues prevent unbounded growth; RAII ensures cleanup                    |
+| **Network Protocol** | Low     | Optional fields in protobuf (high field numbers); backward compatible            |
+| **Configuration**    | None    | New config section; existing configs unaffected                                  |
+| **Build System**     | Low     | Optional CMake flag; builds work without OpenTelemetry                           |
+| **Dependencies**     | Low     | OpenTelemetry SDK is optional; null implementation when disabled                 |
+
+### 3.9.5 Backward Compatibility
+
+| Compatibility   | Status  | Notes                                                 |
+| --------------- | ------- | ----------------------------------------------------- |
+| **Config File** | ✅ Full | New `[telemetry]` section is optional                 |
+| **Protocol**    | ✅ Full | Optional protobuf fields with high field numbers      |
+| **Build**       | ✅ Full | `XRPL_ENABLE_TELEMETRY=OFF` produces identical binary |
+| **Runtime**     | ✅ Full | `enabled=0` produces zero overhead                    |
+| **API**         | ✅ Full | No changes to public RPC or P2P APIs                  |
+
+### 3.9.6 Rollback Strategy
+
+If issues are discovered after deployment:
+
+1. **Immediate**: Set `enabled=0` in config and restart (zero code change)
+2. **Quick**: Rebuild with `XRPL_ENABLE_TELEMETRY=OFF`
+3. **Complete**: Revert telemetry commits (clean separation makes this easy)
+
+### 3.9.7 Code Change Examples
+
+**Minimal RPC Instrumentation (Low Intrusiveness):**
+
+```cpp
+// Before
+void ServerHandler::onRequest(...) {
+    auto result = processRequest(req);
+    send(result);
+}
+
+// After (only ~10 lines added)
+void ServerHandler::onRequest(...) {
+    XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.request");  // +1 line
+    XRPL_TRACE_SET_ATTR("xrpl.rpc.command", command);     // +1 line
+
+    auto result = processRequest(req);
+
+    XRPL_TRACE_SET_ATTR("xrpl.rpc.status", status);       // +1 line
+    send(result);
+}
+```
+
+**Consensus Instrumentation (Medium Intrusiveness):**
+
+```cpp
+// Before
+void RCLConsensusAdaptor::startRound(...) {
+    // ... existing logic
+}
+
+// After (context storage required)
+void RCLConsensusAdaptor::startRound(...) {
+    XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.round");
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq", seq);
+
+    // Store context for child spans in phase transitions
+    currentRoundContext_ = _xrpl_guard_->context();  // New member variable
+
+    // ... existing logic unchanged
+}
+```
+
+---
+
+_Previous: [Design Decisions](./02-design-decisions.md)_ | _Next: [Code Samples](./04-code-samples.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
--- a/OpenTelemetryPlan/04-code-samples.md
+++ b/OpenTelemetryPlan/04-code-samples.md
--- a/OpenTelemetryPlan/05-configuration-reference.md
+++ b/OpenTelemetryPlan/05-configuration-reference.md
--- a/OpenTelemetryPlan/06-implementation-phases.md
+++ b/OpenTelemetryPlan/06-implementation-phases.md
@@ -0,0 +1,649 @@
+# Implementation Phases
+
+> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
+> **Related**: [Configuration Reference](./05-configuration-reference.md) | [Observability Backends](./07-observability-backends.md)
+
+---
+
+## 6.1 Phase Overview
+
+> **TxQ** = Transaction Queue
+
+```mermaid
+gantt
+    title OpenTelemetry Implementation Timeline
+    dateFormat  YYYY-MM-DD
+    axisFormat  Week %W
+
+    section Phase 1
+    Core Infrastructure        :p1, 2024-01-01, 2w
+    SDK Integration           :p1a, 2024-01-01, 4d
+    Telemetry Interface       :p1b, after p1a, 3d
+    Configuration & CMake     :p1c, after p1b, 3d
+    Unit Tests                :p1d, after p1c, 2d
+    Buffer & Integration      :p1e, after p1d, 2d
+
+    section Phase 2
+    RPC Tracing               :p2, after p1, 2w
+    HTTP Context Extraction   :p2a, after p1, 2d
+    RPC Handler Instrumentation :p2b, after p2a, 4d
+    PathFinding Instrumentation :p2f, after p2b, 2d
+    TxQ Instrumentation       :p2g, after p2f, 2d
+    WebSocket Support         :p2c, after p2g, 2d
+    Integration Tests         :p2d, after p2c, 2d
+    Buffer & Review           :p2e, after p2d, 4d
+
+    section Phase 3
+    Transaction Tracing       :p3, after p2, 2w
+    Protocol Buffer Extension :p3a, after p2, 2d
+    PeerImp Instrumentation   :p3b, after p3a, 3d
+    Fee Escalation Instrumentation :p3f, after p3b, 2d
+    Relay Context Propagation :p3c, after p3f, 3d
+    Multi-node Tests          :p3d, after p3c, 2d
+    Buffer & Review           :p3e, after p3d, 4d
+
+    section Phase 4
+    Consensus Tracing         :p4, after p3, 2w
+    Consensus Round Spans     :p4a, after p3, 3d
+    Proposal Handling         :p4b, after p4a, 3d
+    Validator List & Manifest Tracing :p4f, after p4b, 2d
+    Amendment Voting Tracing  :p4g, after p4f, 2d
+    SHAMap Sync Tracing       :p4h, after p4g, 2d
+    Validation Tests          :p4c, after p4h, 4d
+    Buffer & Review           :p4e, after p4c, 4d
+
+    section Phase 5
+    Documentation & Deploy    :p5, after p4, 1w
+```
+
+---
+
+## 6.2 Phase 1: Core Infrastructure (Weeks 1-2)
+
+**Objective**: Establish foundational telemetry infrastructure
+
+### Tasks
+
+| Task | Description                                           |
+| ---- | ----------------------------------------------------- |
+| 1.1  | Add OpenTelemetry C++ SDK to Conan/CMake              |
+| 1.2  | Implement `Telemetry` interface and factory           |
+| 1.3  | Implement `SpanGuard` RAII wrapper                    |
+| 1.4  | Implement configuration parser                        |
+| 1.5  | Integrate into `ApplicationImp`                       |
+| 1.6  | Add conditional compilation (`XRPL_ENABLE_TELEMETRY`) |
+| 1.7  | Create `NullTelemetry` no-op implementation           |
+| 1.8  | Unit tests for core infrastructure                    |
+
+### Exit Criteria
+
+- [ ] OpenTelemetry SDK compiles and links
+- [ ] Telemetry can be enabled/disabled via config
+- [ ] Basic span creation works
+- [ ] No performance regression when disabled
+- [ ] Unit tests passing
+
+---
+
+## 6.3 Phase 2: RPC Tracing (Weeks 3-4)
+
+> **TxQ** = Transaction Queue
+
+**Objective**: Complete tracing for all RPC operations
+
+### Tasks
+
+| Task | Description                                                                |
+| ---- | -------------------------------------------------------------------------- |
+| 2.1  | Implement W3C Trace Context HTTP header extraction                         |
+| 2.2  | Instrument `ServerHandler::onRequest()`                                    |
+| 2.3  | Instrument `RPCHandler::doCommand()`                                       |
+| 2.4  | Add RPC-specific attributes                                                |
+| 2.5  | Instrument WebSocket handler                                               |
+| 2.6  | PathFinding instrumentation (`pathfind.request`, `pathfind.compute` spans) |
+| 2.7  | TxQ instrumentation (`txq.enqueue`, `txq.apply` spans)                     |
+| 2.8  | Integration tests for RPC tracing                                          |
+| 2.9  | Performance benchmarks                                                     |
+| 2.10 | Documentation                                                              |
+
+### Exit Criteria
+
+- [ ] All RPC commands traced
+- [ ] Trace context propagates from HTTP headers
+- [ ] WebSocket and HTTP both instrumented
+- [ ] <1ms overhead per RPC call
+- [ ] Integration tests passing
+
+---
+
+## 6.4 Phase 3: Transaction Tracing (Weeks 5-6)
+
+**Objective**: Trace transaction lifecycle across network
+
+### Tasks
+
+| Task | Description                                          |
+| ---- | ---------------------------------------------------- |
+| 3.1  | Define `TraceContext` Protocol Buffer message        |
+| 3.2  | Implement protobuf context serialization             |
+| 3.3  | Instrument `PeerImp::handleTransaction()`            |
+| 3.4  | Instrument `NetworkOPs::submitTransaction()`         |
+| 3.5  | Instrument HashRouter integration                    |
+| 3.6  | Fee escalation instrumentation (`fee.escalate` span) |
+| 3.7  | Implement relay context propagation                  |
+| 3.8  | Integration tests (multi-node)                       |
+| 3.9  | Performance benchmarks                               |
+
+### Exit Criteria
+
+- [ ] Transaction traces span across nodes
+- [ ] Trace context in Protocol Buffer messages
+- [ ] HashRouter deduplication visible in traces
+- [ ] Multi-node integration tests passing
+- [ ] <5% overhead on transaction throughput
+
+---
+
+## 6.5 Phase 4: Consensus Tracing (Weeks 7-8)
+
+**Objective**: Full observability into consensus rounds
+
+### Tasks
+
+| Task | Description                                    |
+| ---- | ---------------------------------------------- |
+| 4.1  | Instrument `RCLConsensusAdaptor::startRound()` |
+| 4.2  | Instrument phase transitions                   |
+| 4.3  | Instrument proposal handling                   |
+| 4.4  | Instrument validation handling                 |
+| 4.5  | Add consensus-specific attributes              |
+| 4.6  | Correlate with transaction traces              |
+| 4.7  | Validator list and manifest tracing            |
+| 4.8  | Amendment voting tracing                       |
+| 4.9  | SHAMap sync tracing                            |
+| 4.10 | Multi-validator integration tests              |
+| 4.11 | Performance validation                         |
+
+### Spans Produced
+
+| Span Name                   | Location               | Attributes                                                                                                                                                                                                            |
+| --------------------------- | ---------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `consensus.proposal.send`   | `RCLConsensus.cpp:177` | `xrpl.consensus.round`                                                                                                                                                                                                |
+| `consensus.ledger_close`    | `RCLConsensus.cpp:282` | `xrpl.consensus.ledger.seq`, `xrpl.consensus.mode`                                                                                                                                                                    |
+| `consensus.accept`          | `RCLConsensus.cpp:395` | `xrpl.consensus.proposers`, `xrpl.consensus.round_time_ms`                                                                                                                                                            |
+| `consensus.accept.apply`    | `RCLConsensus.cpp:521` | `xrpl.consensus.close_time`, `close_time_correct`, `close_resolution_ms`, `state`, `proposing`, `round_time_ms`, `ledger.seq`, `parent_close_time`, `close_time_self`, `close_time_vote_bins`, `resolution_direction` |
+| `consensus.validation.send` | `RCLConsensus.cpp:753` | `xrpl.consensus.proposing`                                                                                                                                                                                            |
+
+### Exit Criteria
+
+- [x] Complete consensus round traces
+- [x] Phase transitions visible
+- [x] Proposals and validations traced
+- [x] Close time agreement tracked (per `avCT_CONSENSUS_PCT`)
+- [x] No impact on consensus timing
+- [ ] Multi-validator test network validated
+
+### Implementation Status — Phase 4a Complete
+
+Phase 4a (establish-phase gap fill & cross-node correlation) adds:
+
+- **Deterministic trace ID** derived from `previousLedger.id()` so all validators
+  in the same round share the same `trace_id` (switchable via
+  `consensus_trace_strategy` config: `"deterministic"` or `"attribute"`).
+  See [Configuration Reference](./05-configuration-reference.md) for full
+  configuration options. The `consensus_trace_strategy` option will be
+  documented in the configuration reference as part of Phase 4a implementation.
+- **Round lifecycle spans**: `consensus.round` with round-to-round span links.
+- **Establish phase**: `consensus.establish`, `consensus.update_positions` (with
+  `dispute.resolve` events), `consensus.check` (with threshold tracking).
+- **Mode changes**: `consensus.mode_change` spans.
+- **Validation**: `consensus.validation.send` with span link to round span
+  (thread-safe cross-thread access via `roundSpanContext_` snapshot).
+- **Separation of concerns**: telemetry extracted to private helpers
+  (`startRoundTracing`, `createValidationSpan`, `startEstablishTracing`,
+  `updateEstablishTracing`, `endEstablishTracing`).
+
+See [Phase4_taskList.md](./Phase4_taskList.md) for the full spec and implementation notes.
+
+---
+
+## 6.5a Phase 4a: Establish-Phase Gap Fill & Cross-Node Correlation
+
+**Objective**: Fill tracing gaps in the establish phase and establish cross-node
+correlation using deterministic trace IDs derived from `previousLedger.id()`.
+
+**Approach**: Direct instrumentation in `Consensus.h`. Long-lived spans use
+direct SpanGuard members; short-lived scoped spans use `XRPL_TRACE_*` macros.
+
+### Tasks
+
+| Task | Description                                      | Effort | Risk   |
+| ---- | ------------------------------------------------ | ------ | ------ |
+| 4a.0 | Prerequisites: extend SpanGuard & Telemetry APIs | 1d     | Medium |
+| 4a.1 | Adaptor `getTelemetry()` method                  | 0.5d   | Low    |
+| 4a.2 | Switchable round span with deterministic traceID | 2d     | High   |
+| 4a.3 | Span members in `Consensus.h`                    | 0.5d   | Medium |
+| 4a.4 | Instrument `phaseEstablish()`                    | 1d     | Medium |
+| 4a.5 | Instrument `updateOurPositions()`                | 1d     | Medium |
+| 4a.6 | Instrument `haveConsensus()` (thresholds)        | 1d     | Medium |
+| 4a.7 | Instrument mode changes                          | 0.5d   | Low    |
+| 4a.8 | Reparent existing spans under round              | 0.5d   | Low    |
+| 4a.9 | Build verification and testing                   | 1d     | Low    |
+
+**Total Effort**: 9 days
+
+### Spans Produced
+
+| Span Name                    | Location           | Key Attributes                                                   |
+| ---------------------------- | ------------------ | ---------------------------------------------------------------- |
+| `consensus.round`            | `RCLConsensus.cpp` | `round_id`, `ledger_id`, `ledger.seq`, `mode`; link → prev round |
+| `consensus.establish`        | `Consensus.h`      | `converge_percent`, `establish_count`, `proposers`               |
+| `consensus.update_positions` | `Consensus.h`      | `disputes_count`, `converge_percent`, `proposers_agreed/total`   |
+| `consensus.check`            | `Consensus.h`      | `agree/disagree_count`, `threshold_percent`, `result`            |
+| `consensus.mode_change`      | `RCLConsensus.cpp` | `mode.old`, `mode.new`                                           |
+
+### Exit Criteria
+
+- [ ] Establish phase internals fully traced (disputes, convergence, thresholds)
+- [ ] Cross-node correlation works via deterministic trace_id
+- [ ] Strategy switchable via config (`deterministic` / `attribute`)
+- [ ] Consecutive rounds linked via follows-from spans
+- [ ] Build passes with telemetry ON and OFF
+- [ ] No impact on consensus timing
+
+See [Phase4_taskList.md](./Phase4_taskList.md) for full task details.
+
+---
+
+## 6.5b Phase 4b: Cross-Node Propagation (Future)
+
+**Objective**: Wire `TraceContextPropagator` for P2P messages (proposals,
+validations) to enable true distributed tracing between nodes.
+
+**Status**: Design documented, NOT implemented. Protobuf fields (field 1001)
+and `TraceContextPropagator` class exist. Wiring deferred until Phase 4a is
+validated in a multi-node environment.
+
+**Prerequisites**: Phase 4a complete and validated.
+
+See [Phase4_taskList.md § Phase 4b](./Phase4_taskList.md) for full design.
+
+---
+
+## 6.6 Phase 5: Documentation & Deployment (Week 9)
+
+**Objective**: Production readiness
+
+### Tasks
+
+| Task | Description                   |
+| ---- | ----------------------------- |
+| 5.1  | Operator runbook              |
+| 5.2  | Grafana dashboards            |
+| 5.3  | Alert definitions             |
+| 5.4  | Collector deployment examples |
+| 5.5  | Developer documentation       |
+| 5.6  | Training materials            |
+| 5.7  | Final integration testing     |
+
+---
+
+## 6.7 Risk Assessment
+
+```mermaid
+quadrantChart
+    title Risk Assessment Matrix
+    x-axis Low Impact --> High Impact
+    y-axis Low Likelihood --> High Likelihood
+    quadrant-1 Mitigate Immediately
+    quadrant-2 Plan Mitigation
+    quadrant-3 Accept Risk
+    quadrant-4 Monitor Closely
+
+    SDK Compat: [0.2, 0.18]
+    Protocol Chg: [0.75, 0.72]
+    Perf Overhead: [0.58, 0.42]
+    Context Prop: [0.4, 0.55]
+    Memory Leaks: [0.85, 0.25]
+```
+
+### Risk Details
+
+| Risk                                 | Likelihood | Impact | Mitigation                              |
+| ------------------------------------ | ---------- | ------ | --------------------------------------- |
+| Protocol changes break compatibility | Medium     | High   | Use high field numbers, optional fields |
+| Performance overhead unacceptable    | Medium     | Medium | Sampling, conditional compilation       |
+| Context propagation complexity       | Medium     | Medium | Phased rollout, extensive testing       |
+| SDK compatibility issues             | Low        | Medium | Pin SDK version, fallback to no-op      |
+| Memory leaks in long-running nodes   | Low        | High   | Memory profiling, bounded queues        |
+
+---
+
+## 6.8 Success Metrics
+
+| Metric                   | Target                                                         | Measurement           |
+| ------------------------ | -------------------------------------------------------------- | --------------------- |
+| Trace coverage           | >95% of transaction code paths (independent of sampling ratio) | Sampling verification |
+| CPU overhead             | <3%                                                            | Benchmark tests       |
+| Memory overhead          | <10 MB                                                         | Memory profiling      |
+| Latency impact (p99)     | <2%                                                            | Performance tests     |
+| Trace completeness       | >99% spans with required attrs                                 | Validation script     |
+| Cross-node trace linkage | >90% of multi-hop transactions                                 | Integration tests     |
+
+---
+
+## 6.9 Quick Wins and Crawl-Walk-Run Strategy
+
+> **TxQ** = Transaction Queue
+
+This section outlines a prioritized approach to maximize ROI with minimal initial investment.
+
+### 6.9.1 Crawl-Walk-Run Overview
+
+<div align="center">
+
+```mermaid
+flowchart TB
+    subgraph crawl["🐢 CRAWL (Week 1-2)"]
+        direction LR
+        c1[Core SDK Setup] ~~~ c2[RPC Tracing Only] ~~~ c3[PathFinding + TxQ Tracing] ~~~ c4[Single Node]
+    end
+
+    subgraph walk["🚶 WALK (Week 3-5)"]
+        direction LR
+        w1[Transaction Tracing] ~~~ w2[Fee Escalation Tracing] ~~~ w3[Cross-Node Context] ~~~ w4[Basic Dashboards]
+    end
+
+    subgraph run["🏃 RUN (Week 6-9)"]
+        direction LR
+        r1[Consensus Tracing] ~~~ r2[Validator, Amendment,<br/>SHAMap Tracing] ~~~ r3[Full Correlation] ~~~ r4[Production Deploy]
+    end
+
+    crawl --> walk --> run
+
+    style crawl fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style walk fill:#bf360c,stroke:#8c2809,color:#fff
+    style run fill:#0d47a1,stroke:#082f6a,color:#fff
+    style c1 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style c2 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style c3 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style c4 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style w1 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style w2 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style w3 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style w4 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style r1 fill:#0d47a1,stroke:#082f6a,color:#fff
+    style r2 fill:#0d47a1,stroke:#082f6a,color:#fff
+    style r3 fill:#0d47a1,stroke:#082f6a,color:#fff
+    style r4 fill:#0d47a1,stroke:#082f6a,color:#fff
+```
+
+</div>
+
+**Reading the diagram:**
+
+- **CRAWL (Weeks 1-2)**: Minimal investment -- set up the SDK, instrument RPC and PathFinding/TxQ handlers, and verify on a single node. Delivers immediate latency visibility.
+- **WALK (Weeks 3-5)**: Expand to transaction lifecycle tracing, fee escalation, cross-node context propagation, and basic Grafana dashboards. This is where distributed tracing starts working.
+- **RUN (Weeks 6-9)**: Full consensus instrumentation, validator/amendment/SHAMap tracing, end-to-end correlation, and production deployment with sampling and alerting.
+- **Arrows (crawl → walk → run)**: Each phase builds on the prior one; you cannot skip ahead because later phases depend on infrastructure established earlier.
+
+### 6.9.2 Quick Wins (Immediate Value)
+
+| Quick Win                      | Value  | When to Deploy |
+| ------------------------------ | ------ | -------------- |
+| **RPC Command Tracing**        | High   | Week 2         |
+| **RPC Latency Histograms**     | High   | Week 2         |
+| **Error Rate Dashboard**       | Medium | Week 2         |
+| **Transaction Submit Tracing** | High   | Week 3         |
+| **Consensus Round Duration**   | Medium | Week 6         |
+
+### 6.9.3 CRAWL Phase (Weeks 1-2)
+
+**Goal**: Get basic tracing working with minimal code changes.
+
+**What You Get**:
+
+- RPC request/response traces for all commands
+- Latency breakdown per RPC command
+- PathFinding and TxQ tracing (directly impacts RPC latency)
+- Error visibility with stack traces
+- Basic Grafana dashboard
+
+**Code Changes**: ~15 lines in `ServerHandler.cpp`, ~40 lines in new telemetry module
+
+**Why Start Here**:
+
+- RPC is the lowest-risk, highest-visibility component
+- PathFinding and TxQ are RPC-adjacent and directly affect latency
+- Immediate value for debugging client issues
+- No cross-node complexity
+- Single file modification to existing code
+
+### 6.9.4 WALK Phase (Weeks 3-5)
+
+**Goal**: Add transaction lifecycle tracing across nodes.
+
+**What You Get**:
+
+- End-to-end transaction traces from submit to relay
+- Fee escalation tracing within the transaction pipeline
+- Cross-node correlation (see transaction path)
+- HashRouter deduplication visibility
+- Relay latency metrics
+
+**Code Changes**: ~120 lines across 4 files, plus protobuf extension
+
+**Why Do This Second**:
+
+- Builds on RPC tracing (transactions submitted via RPC)
+- Fee escalation is integral to the transaction processing pipeline
+- Moderate complexity (requires context propagation)
+- High value for debugging transaction issues
+
+### 6.9.5 RUN Phase (Weeks 6-9)
+
+**Goal**: Full observability including consensus.
+
+**What You Get**:
+
+- Complete consensus round visibility
+- Phase transition timing
+- Validator proposal tracking
+- Validator list and manifest tracing
+- Amendment voting tracing
+- SHAMap sync tracing
+- Full end-to-end traces (client → RPC → TX → consensus → ledger)
+
+**Code Changes**: ~100 lines across 3 consensus files, plus validator/amendment/SHAMap modules
+
+**Why Do This Last**:
+
+- Highest complexity (consensus is critical path)
+- Validator, amendment, and SHAMap components are lower priority
+- Requires thorough testing
+- Lower relative value (consensus issues are rarer)
+
+### 6.9.6 ROI Prioritization Matrix
+
+```mermaid
+quadrantChart
+    title Implementation ROI Matrix
+    x-axis Low Effort --> High Effort
+    y-axis Low Value --> High Value
+    quadrant-1 Quick Wins - Do First
+    quadrant-2 Major Projects - Plan Carefully
+    quadrant-3 Nice to Have - Optional
+    quadrant-4 Time Sinks - Avoid
+
+    RPC Tracing: [0.15, 0.92]
+    TX Submit Trace: [0.3, 0.78]
+    TX Relay Trace: [0.5, 0.88]
+    Consensus Trace: [0.72, 0.72]
+    Peer Msg Trace: [0.85, 0.3]
+    Ledger Acquire: [0.55, 0.52]
+```
+
+---
+
+## 6.10 Definition of Done
+
+> **TxQ** = Transaction Queue | **HA** = High Availability
+
+Clear, measurable criteria for each phase.
+
+### 6.10.1 Phase 1: Core Infrastructure
+
+| Criterion       | Measurement                                                | Target                       |
+| --------------- | ---------------------------------------------------------- | ---------------------------- |
+| SDK Integration | `cmake --build` succeeds with `-DXRPL_ENABLE_TELEMETRY=ON` | ✅ Compiles                  |
+| Runtime Toggle  | `enabled=0` produces zero overhead                         | <0.1% CPU difference         |
+| Span Creation   | Unit test creates and exports span                         | Span appears in Tempo        |
+| Configuration   | All config options parsed correctly                        | Config validation tests pass |
+| Documentation   | Developer guide exists                                     | PR approved                  |
+
+**Definition of Done**: All criteria met, PR merged, no regressions in CI.
+
+### 6.10.2 Phase 2: RPC Tracing
+
+| Criterion          | Measurement                        | Target                     |
+| ------------------ | ---------------------------------- | -------------------------- |
+| Coverage           | All RPC commands instrumented      | 100% of commands           |
+| Context Extraction | traceparent header propagates      | Integration test passes    |
+| Attributes         | Command, status, duration recorded | Validation script confirms |
+| Performance        | RPC latency overhead               | <1ms p99                   |
+| Dashboard          | Grafana dashboard deployed         | Screenshot in docs         |
+
+**Definition of Done**: RPC traces visible in Tempo for all commands, dashboard shows latency distribution.
+
+### 6.10.3 Phase 3: Transaction Tracing
+
+| Criterion        | Measurement                     | Target                             |
+| ---------------- | ------------------------------- | ---------------------------------- |
+| Local Trace      | Submit → validate → TxQ traced  | Single-node test passes            |
+| Cross-Node       | Context propagates via protobuf | Multi-node test passes             |
+| Relay Visibility | relay_count attribute correct   | Spot check 100 txs                 |
+| HashRouter       | Deduplication visible in trace  | Duplicate txs show suppressed=true |
+| Performance      | TX throughput overhead          | <5% degradation                    |
+
+**Definition of Done**: Transaction traces span 3+ nodes in test network, performance within bounds.
+
+### 6.10.4 Phase 4: Consensus Tracing
+
+| Criterion            | Measurement                   | Target                    |
+| -------------------- | ----------------------------- | ------------------------- |
+| Round Tracing        | startRound creates root span  | Unit test passes          |
+| Phase Visibility     | All phases have child spans   | Integration test confirms |
+| Proposer Attribution | Proposer ID in attributes     | Spot check 50 rounds      |
+| Timing Accuracy      | Phase durations match PerfLog | <5% variance              |
+| No Consensus Impact  | Round timing unchanged        | Performance test passes   |
+
+**Definition of Done**: Consensus rounds fully traceable, no impact on consensus timing.
+
+### 6.10.5 Phase 5: Production Deployment
+
+| Criterion    | Measurement                  | Target                     |
+| ------------ | ---------------------------- | -------------------------- |
+| Collector HA | Multiple collectors deployed | No single point of failure |
+| Sampling     | Tail sampling configured     | 10% base + errors + slow   |
+| Retention    | Data retained per policy     | 7 days hot, 30 days warm   |
+| Alerting     | Alerts configured            | Error spike, high latency  |
+| Runbook      | Operator documentation       | Approved by ops team       |
+| Training     | Team trained                 | Session completed          |
+
+**Definition of Done**: Telemetry running in production, operators trained, alerts active.
+
+### 6.10.6 Success Metrics Summary
+
+| Phase   | Primary Metric         | Secondary Metric            | Deadline      |
+| ------- | ---------------------- | --------------------------- | ------------- |
+| Phase 1 | SDK compiles and runs  | Zero overhead when disabled | End of Week 2 |
+| Phase 2 | 100% RPC coverage      | <1ms latency overhead       | End of Week 4 |
+| Phase 3 | Cross-node traces work | <5% throughput impact       | End of Week 6 |
+| Phase 4 | Consensus fully traced | No consensus timing impact  | End of Week 8 |
+| Phase 5 | Production deployment  | Operators trained           | End of Week 9 |
+
+---
+
+## 6.12 Recommended Implementation Order
+
+Based on ROI analysis, implement in this exact order:
+
+```mermaid
+flowchart TB
+    subgraph week1["Week 1"]
+        t1[1. OpenTelemetry SDK<br/>Conan/CMake integration]
+        t2[2. Telemetry interface<br/>SpanGuard, config]
+    end
+
+    subgraph week2["Week 2"]
+        t3[3. RPC ServerHandler<br/>instrumentation]
+        t4[4. Basic Tempo setup<br/>for testing]
+    end
+
+    subgraph week3["Week 3"]
+        t5[5. Transaction submit<br/>tracing]
+        t6[6. Grafana dashboard<br/>v1]
+    end
+
+    subgraph week4["Week 4"]
+        t7[7. Protobuf context<br/>extension]
+        t8[8. PeerImp tx.relay<br/>instrumentation]
+    end
+
+    subgraph week5["Week 5"]
+        t9[9. Multi-node<br/>integration tests]
+        t10[10. Performance<br/>benchmarks]
+    end
+
+    subgraph week6_8["Weeks 6-8"]
+        t11[11. Consensus<br/>instrumentation]
+        t12[12. Full integration<br/>testing]
+    end
+
+    subgraph week9["Week 9"]
+        t13[13. Production<br/>deployment]
+        t14[14. Documentation<br/>& training]
+    end
+
+    t1 --> t2 --> t3 --> t4
+    t4 --> t5 --> t6
+    t6 --> t7 --> t8
+    t8 --> t9 --> t10
+    t10 --> t11 --> t12
+    t12 --> t13 --> t14
+
+    style week1 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style week2 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style week3 fill:#bf360c,stroke:#8c2809,color:#fff
+    style week4 fill:#bf360c,stroke:#8c2809,color:#fff
+    style week5 fill:#bf360c,stroke:#8c2809,color:#fff
+    style week6_8 fill:#0d47a1,stroke:#082f6a,color:#fff
+    style week9 fill:#4a148c,stroke:#2e0d57,color:#fff
+    style t1 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style t2 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style t3 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style t4 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style t5 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style t6 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style t7 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style t8 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style t9 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style t10 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
+    style t11 fill:#0d47a1,stroke:#082f6a,color:#fff
+    style t12 fill:#0d47a1,stroke:#082f6a,color:#fff
+    style t13 fill:#4a148c,stroke:#2e0d57,color:#fff
+    style t14 fill:#4a148c,stroke:#2e0d57,color:#fff
+```
+
+**Reading the diagram:**
+
+- **Week 1 (tasks 1-2)**: Foundation work -- integrate the OpenTelemetry SDK via Conan/CMake and build the `Telemetry` interface with `SpanGuard` and config parsing.
+- **Week 2 (tasks 3-4)**: First observable output -- instrument `ServerHandler` for RPC tracing and stand up Tempo so developers can see traces immediately.
+- **Weeks 3-5 (tasks 5-10)**: Transaction lifecycle -- add submit tracing, build the first Grafana dashboard, extend protobuf for cross-node context, instrument `PeerImp` relay, then validate with multi-node integration tests and performance benchmarks.
+- **Weeks 6-8 (tasks 11-12)**: Consensus deep-dive -- instrument consensus rounds and phases, then run full integration testing across all instrumented paths.
+- **Week 9 (tasks 13-14)**: Go-live -- deploy to production with sampling/alerting configured, and deliver documentation and operator training.
+- **Arrow chain (t1 → ... → t14)**: Strict sequential dependency; each task's output is a prerequisite for the next.
+
+---
+
+_Previous: [Configuration Reference](./05-configuration-reference.md)_ | _Next: [Observability Backends](./07-observability-backends.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
--- a/OpenTelemetryPlan/07-observability-backends.md
+++ b/OpenTelemetryPlan/07-observability-backends.md
@@ -0,0 +1,641 @@
+# Observability Backend Recommendations
+
+> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
+> **Related**: [Implementation Phases](./06-implementation-phases.md) | [Appendix](./08-appendix.md)
+
+---
+
+## 7.1 Development/Testing Backends
+
+> **OTLP** = OpenTelemetry Protocol
+
+| Backend    | Pros                                | Cons                   | Use Case            |
+| ---------- | ----------------------------------- | ---------------------- | ------------------- |
+| **Tempo**  | Cost-effective, Grafana integration | Requires Grafana stack | Local dev, CI, Prod |
+| **Zipkin** | Simple, lightweight                 | Basic features         | Quick prototyping   |
+
+### Quick Start with Tempo
+
+```bash
+# Start Tempo with OTLP support
+docker run -d --name tempo \
+  -p 3200:3200 \
+  -p 4317:4317 \
+  -p 4318:4318 \
+  grafana/tempo:2.6.1
+```
+
+---
+
+## 7.2 Production Backends
+
+> **APM** = Application Performance Monitoring
+
+| Backend           | Pros                                      | Cons                   | Use Case                    |
+| ----------------- | ----------------------------------------- | ---------------------- | --------------------------- |
+| **Grafana Tempo** | Cost-effective, Grafana integration       | Requires Grafana stack | Most production deployments |
+| **Elastic APM**   | Full observability stack, log correlation | Resource intensive     | Existing Elastic users      |
+| **Honeycomb**     | Excellent query, high cardinality         | SaaS cost              | Deep debugging needs        |
+| **Datadog APM**   | Full platform, easy setup                 | SaaS cost              | Enterprise with budget      |
+
+### Backend Selection Flowchart
+
+```mermaid
+flowchart TD
+    start[Select Backend] --> budget{Budget<br/>Constraints?}
+
+    budget -->|Yes| oss[Open Source]
+    budget -->|No| saas{Prefer<br/>SaaS?}
+
+    oss --> existing{Existing<br/>Stack?}
+    existing -->|Grafana| tempo[Grafana Tempo]
+    existing -->|Elastic| elastic[Elastic APM]
+    existing -->|None| tempo
+
+    saas -->|Yes| enterprise{Enterprise<br/>Support?}
+    saas -->|No| oss
+
+    enterprise -->|Yes| datadog[Datadog APM]
+    enterprise -->|No| honeycomb[Honeycomb]
+
+    tempo --> final[Configure Collector]
+    elastic --> final
+    honeycomb --> final
+    datadog --> final
+
+    style start fill:#0f172a,stroke:#020617,color:#fff
+    style budget fill:#334155,stroke:#1e293b,color:#fff
+    style oss fill:#1e293b,stroke:#0f172a,color:#fff
+    style existing fill:#334155,stroke:#1e293b,color:#fff
+    style saas fill:#334155,stroke:#1e293b,color:#fff
+    style enterprise fill:#334155,stroke:#1e293b,color:#fff
+    style final fill:#0f172a,stroke:#020617,color:#fff
+    style tempo fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style elastic fill:#bf360c,stroke:#8c2809,color:#fff
+    style honeycomb fill:#0d47a1,stroke:#082f6a,color:#fff
+    style datadog fill:#4a148c,stroke:#2e0d57,color:#fff
+```
+
+**Reading the diagram:**
+
+- **Budget Constraints? (Yes)**: Leads to open-source options. If you already run Grafana or Elastic, pick the matching backend; otherwise default to Grafana Tempo.
+- **Budget Constraints? (No) → Prefer SaaS?**: If you want a managed service, choose between Datadog (enterprise support) and Honeycomb (developer-focused). If not, fall back to open-source.
+- **Terminal nodes (Tempo / Elastic / Honeycomb / Datadog)**: Each represents a concrete backend choice, all of which feed into the same final step.
+- **Configure Collector**: Regardless of backend, you always finish by configuring the OTel Collector to export to your chosen destination.
+
+---
+
+## 7.3 Recommended Production Architecture
+
+> **OTLP** = OpenTelemetry Protocol | **APM** = Application Performance Monitoring | **HA** = High Availability
+
+```mermaid
+flowchart TB
+    subgraph validators["Validator Nodes"]
+        v1[rippled<br/>Validator 1]
+        v2[rippled<br/>Validator 2]
+    end
+
+    subgraph stock["Stock Nodes"]
+        s1[rippled<br/>Stock 1]
+        s2[rippled<br/>Stock 2]
+    end
+
+    subgraph collector["OTel Collector Cluster"]
+        c1[Collector<br/>DC1]
+        c2[Collector<br/>DC2]
+    end
+
+    subgraph backends["Storage Backends"]
+        tempo[(Grafana<br/>Tempo)]
+        elastic[(Elastic<br/>APM)]
+        archive[(S3/GCS<br/>Archive)]
+    end
+
+    subgraph ui["Visualization"]
+        grafana[Grafana<br/>Dashboards]
+    end
+
+    v1 -->|OTLP| c1
+    v2 -->|OTLP| c1
+    s1 -->|OTLP| c2
+    s2 -->|OTLP| c2
+
+    c1 --> tempo
+    c1 --> elastic
+    c2 --> tempo
+    c2 --> archive
+
+    tempo --> grafana
+    elastic --> grafana
+
+    %% Note: simplified single-collector-per-DC topology shown for clarity
+
+    style validators fill:#b71c1c,stroke:#7f1d1d,color:#ffffff
+    style stock fill:#0d47a1,stroke:#082f6a,color:#ffffff
+    style collector fill:#bf360c,stroke:#8c2809,color:#ffffff
+    style backends fill:#1b5e20,stroke:#0d3d14,color:#ffffff
+    style ui fill:#4a148c,stroke:#2e0d57,color:#ffffff
+```
+
+**Reading the diagram:**
+
+- **Validator / Stock Nodes**: All rippled nodes emit trace data via OTLP. Validators and stock nodes are grouped separately because they may reside in different network zones.
+- **Collector Cluster (DC1, DC2)**: Regional collectors receive OTLP from nodes in their datacenter, apply processing (sampling, enrichment), and fan out to multiple backends.
+- **Storage Backends**: Tempo and Elastic provide queryable trace storage; S3/GCS Archive provides long-term cold storage for compliance or post-incident analysis.
+- **Grafana Dashboards**: The single visualization layer that queries both Tempo and Elastic, giving operators a unified view of all traces.
+- **Data flow direction**: Nodes → Collectors → Storage → Grafana. Each arrow represents a network hop; minimizing collector-to-backend hops reduces latency.
+
+> **Note**: Production deployments should use multiple collector instances behind a load balancer for high availability. The diagram shows a simplified single-collector topology for clarity.
+
+---
+
+## 7.4 Architecture Considerations
+
+### 7.4.1 Collector Placement
+
+| Strategy      | Description          | Pros                     | Cons                    |
+| ------------- | -------------------- | ------------------------ | ----------------------- |
+| **Sidecar**   | Collector per node   | Isolation, simple config | Resource overhead       |
+| **DaemonSet** | Collector per host   | Shared resources         | Complexity              |
+| **Gateway**   | Central collector(s) | Centralized processing   | Single point of failure |
+
+**Recommendation**: Use **Gateway** pattern with regional collectors for rippled networks:
+
+- One collector cluster per datacenter/region
+- Tail-based sampling at collector level
+- Multiple export destinations for redundancy
+
+### 7.4.2 Sampling Strategy
+
+```mermaid
+flowchart LR
+    subgraph head["Head Sampling (Node)"]
+        hs[Node-level head sampling<br/>configurable, default: 100%<br/>recommended production: 10%]
+    end
+
+    subgraph tail["Tail Sampling (Collector)"]
+        ts1[Keep all errors]
+        ts2[Keep slow >5s]
+        ts3[Keep 10% rest]
+    end
+
+    head --> tail
+
+    ts1 --> final[Final Traces]
+    ts2 --> final
+    ts3 --> final
+
+    style head fill:#0d47a1,stroke:#082f6a,color:#fff
+    style tail fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style hs fill:#0d47a1,stroke:#082f6a,color:#fff
+    style ts1 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style ts2 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style ts3 fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style final fill:#bf360c,stroke:#8c2809,color:#fff
+```
+
+**Reading the diagram:**
+
+- **Head Sampling (Node)**: The first filter -- each rippled node decides whether to sample a trace at creation time (default 100%, recommended 10% in production). This controls the volume leaving the node.
+- **Tail Sampling (Collector)**: The second filter -- the collector inspects completed traces and applies rules: keep all errors, keep anything slower than 5 seconds, and keep 10% of the remainder.
+- **Arrow head → tail**: All head-sampled traces flow to the collector, where tail sampling further reduces volume while preserving the most valuable data.
+- **Final Traces**: The output after both sampling stages; this is what gets stored and queried. The two-stage approach balances cost with debuggability.
+
+### 7.4.3 Data Retention
+
+| Environment | Hot Storage | Warm Storage | Cold Archive |
+| ----------- | ----------- | ------------ | ------------ |
+| Development | 24 hours    | N/A          | N/A          |
+| Staging     | 7 days      | N/A          | N/A          |
+| Production  | 7 days      | 30 days      | many years   |
+
+---
+
+## 7.5 Integration Checklist
+
+- [ ] Choose primary backend (Tempo recommended for cost/features)
+- [ ] Deploy collector cluster with high availability
+- [ ] Configure tail-based sampling for error/latency traces
+- [ ] Set up Grafana dashboards for trace visualization
+- [ ] Configure alerts for trace anomalies
+- [ ] Establish data retention policies
+- [ ] Test trace correlation with logs and metrics
+
+---
+
+## 7.6 Grafana Dashboard Examples
+
+Pre-built dashboards for rippled observability.
+
+### 7.6.1 Consensus Health Dashboard
+
+```json
+{
+  "title": "rippled Consensus Health",
+  "uid": "rippled-consensus-health",
+  "tags": ["rippled", "consensus", "tracing"],
+  "panels": [
+    {
+      "title": "Consensus Round Duration",
+      "type": "timeseries",
+      "datasource": "Tempo",
+      "targets": [
+        {
+          "queryType": "traceql",
+          "query": "{resource.service.name=\"rippled\" && name=\"consensus.round\"} | avg(duration) by (resource.service.instance.id)"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "thresholds": {
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 4000 },
+              { "color": "red", "value": 5000 }
+            ]
+          }
+        }
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }
+    },
+    {
+      "title": "Phase Duration Breakdown",
+      "type": "barchart",
+      "datasource": "Tempo",
+      "targets": [
+        {
+          "queryType": "traceql",
+          "query": "{resource.service.name=\"rippled\" && name=~\"consensus.phase.*\"} | avg(duration) by (name)"
+        }
+      ],
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }
+    },
+    {
+      "title": "Proposers per Round",
+      "type": "stat",
+      "datasource": "Tempo",
+      "targets": [
+        {
+          "queryType": "traceql",
+          "query": "{resource.service.name=\"rippled\" && name=\"consensus.round\"} | avg(span.xrpl.consensus.proposers)"
+        }
+      ],
+      "gridPos": { "h": 4, "w": 6, "x": 0, "y": 8 }
+    },
+    {
+      "title": "Recent Slow Rounds (>5s)",
+      "type": "table",
+      "datasource": "Tempo",
+      "targets": [
+        {
+          "queryType": "traceql",
+          "query": "{resource.service.name=\"rippled\" && name=\"consensus.round\"} | duration > 5s"
+        }
+      ],
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 12 }
+    }
+  ]
+}
+```
+
+### 7.6.2 Node Overview Dashboard
+
+```json
+{
+  "title": "rippled Node Overview",
+  "uid": "rippled-node-overview",
+  "panels": [
+    {
+      "title": "Active Nodes",
+      "type": "stat",
+      "datasource": "Tempo",
+      "targets": [
+        {
+          "queryType": "traceql",
+          "query": "{resource.service.name=\"rippled\"} | count_over_time() by (resource.service.instance.id) | count()"
+        }
+      ],
+      "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }
+    },
+    {
+      "title": "Total Transactions (1h)",
+      "type": "stat",
+      "datasource": "Tempo",
+      "targets": [
+        {
+          "queryType": "traceql",
+          "query": "{resource.service.name=\"rippled\" && name=\"tx.receive\"} | count()"
+        }
+      ],
+      "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }
+    },
+    {
+      "title": "Error Rate",
+      "type": "gauge",
+      "datasource": "Tempo",
+      "targets": [
+        {
+          "queryType": "traceql",
+          "query": "{resource.service.name=\"rippled\" && status.code=error} | rate() / {resource.service.name=\"rippled\"} | rate() * 100"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent",
+          "max": 10,
+          "thresholds": {
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 1 },
+              { "color": "red", "value": 5 }
+            ]
+          }
+        }
+      },
+      "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }
+    },
+    {
+      "title": "Service Map",
+      "type": "nodeGraph",
+      "datasource": "Tempo",
+      "gridPos": { "h": 12, "w": 12, "x": 12, "y": 0 }
+    }
+  ]
+}
+```
+
+### 7.6.3 Alert Rules
+
+```yaml
+# grafana/provisioning/alerting/rippled-alerts.yaml
+apiVersion: 1
+
+groups:
+  - name: rippled-tracing-alerts
+    folder: rippled
+    interval: 1m
+    rules:
+      - uid: consensus-slow
+        title: Consensus Round Slow
+        condition: A
+        data:
+          - refId: A
+            datasourceUid: tempo
+            model:
+              queryType: traceql
+              query: '{resource.service.name="rippled" && name="consensus.round"} | avg(duration) > 5s'
+              # Note: Verify TraceQL aggregate queries are supported by your
+              # Tempo version. Aggregate alerting (e.g., avg(duration)) requires
+              # Tempo 2.3+ with TraceQL metrics enabled.
+        for: 5m
+        annotations:
+          summary: Consensus rounds taking >5 seconds
+          description: "Consensus duration: {{ $value }}ms"
+        labels:
+          severity: warning
+
+      - uid: rpc-error-spike
+        title: RPC Error Rate Spike
+        condition: B
+        data:
+          - refId: B
+            datasourceUid: tempo
+            model:
+              queryType: traceql
+              query: '{resource.service.name="rippled" && name=~"rpc.command.*" && status.code=error} | rate() > 0.05'
+              # Note: Verify TraceQL aggregate queries are supported by your
+              # Tempo version. Aggregate alerting (e.g., rate()) requires
+              # Tempo 2.3+ with TraceQL metrics enabled.
+        for: 2m
+        annotations:
+          summary: RPC error rate >5%
+        labels:
+          severity: critical
+
+      - uid: tx-throughput-drop
+        title: Transaction Throughput Drop
+        condition: C
+        data:
+          - refId: C
+            datasourceUid: tempo
+            model:
+              queryType: traceql
+              query: '{resource.service.name="rippled" && name="tx.receive"} | rate() < 10'
+        for: 10m
+        annotations:
+          summary: Transaction throughput below threshold
+        labels:
+          severity: warning
+```
+
+---
+
+## 7.7 PerfLog and Insight Correlation
+
+> **OTLP** = OpenTelemetry Protocol
+
+How to correlate OpenTelemetry traces with existing rippled observability.
+
+### 7.7.1 Correlation Architecture
+
+```mermaid
+flowchart TB
+    subgraph rippled["rippled Node"]
+        otel[OpenTelemetry<br/>Spans]
+        perflog[PerfLog<br/>JSON Logs]
+        insight[Beast Insight<br/>StatsD Metrics]
+    end
+
+    subgraph collectors["Data Collection"]
+        otelc[OTel Collector]
+        promtail[Promtail/Fluentd]
+        statsd[StatsD Exporter]
+    end
+
+    subgraph storage["Storage"]
+        tempo[(Tempo)]
+        loki[(Loki)]
+        prom[(Prometheus)]
+    end
+
+    subgraph grafana["Grafana"]
+        traces[Trace View]
+        logs[Log View]
+        metrics[Metrics View]
+        corr[Correlation<br/>Panel]
+    end
+
+    otel -->|OTLP| otelc --> tempo
+    perflog -->|JSON| promtail --> loki
+    insight -->|StatsD| statsd --> prom
+
+    tempo --> traces
+    loki --> logs
+    prom --> metrics
+
+    traces --> corr
+    logs --> corr
+    metrics --> corr
+
+    style rippled fill:#0d47a1,stroke:#082f6a,color:#fff
+    style collectors fill:#bf360c,stroke:#8c2809,color:#fff
+    style storage fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style grafana fill:#4a148c,stroke:#2e0d57,color:#fff
+    style otel fill:#0d47a1,stroke:#082f6a,color:#fff
+    style perflog fill:#0d47a1,stroke:#082f6a,color:#fff
+    style insight fill:#0d47a1,stroke:#082f6a,color:#fff
+    style otelc fill:#bf360c,stroke:#8c2809,color:#fff
+    style promtail fill:#bf360c,stroke:#8c2809,color:#fff
+    style statsd fill:#bf360c,stroke:#8c2809,color:#fff
+    style tempo fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style loki fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style prom fill:#1b5e20,stroke:#0d3d14,color:#fff
+    style traces fill:#4a148c,stroke:#2e0d57,color:#fff
+    style logs fill:#4a148c,stroke:#2e0d57,color:#fff
+    style metrics fill:#4a148c,stroke:#2e0d57,color:#fff
+    style corr fill:#4a148c,stroke:#2e0d57,color:#fff
+```
+
+**Reading the diagram:**
+
+- **rippled Node (three sources)**: A single node emits three independent data streams -- OpenTelemetry spans, PerfLog JSON logs, and Beast Insight StatsD metrics.
+- **Data Collection layer**: Each stream has its own collector -- OTel Collector for spans, Promtail/Fluentd for logs, and a StatsD exporter for metrics. They operate independently.
+- **Storage layer (Tempo, Loki, Prometheus)**: Each data type lands in a purpose-built store optimized for its query patterns (trace search, log grep, metric aggregation).
+- **Grafana Correlation Panel**: The key integration point -- Grafana queries all three stores and links them via shared fields (`trace_id`, `xrpl.tx.hash`, `ledger_seq`), enabling a single-pane debugging experience.
+
+### 7.7.2 Correlation Fields
+
+| Source      | Field                       | Link To       | Purpose                    |
+| ----------- | --------------------------- | ------------- | -------------------------- |
+| **Trace**   | `trace_id`                  | Logs          | Find log entries for trace |
+| **Trace**   | `xrpl.tx.hash`              | Logs, Metrics | Find TX-related data       |
+| **Trace**   | `xrpl.consensus.ledger.seq` | Logs          | Find ledger-related logs   |
+| **PerfLog** | `trace_id` (new)            | Traces        | Jump to trace from log     |
+| **PerfLog** | `ledger_seq`                | Traces        | Find consensus trace       |
+| **Insight** | `exemplar.trace_id`         | Traces        | Jump from metric spike     |
+
+### 7.7.3 Example: Debugging a Slow Transaction
+
+**Step 1: Find the trace**
+
+```
+# In Grafana Explore with Tempo
+{resource.service.name="rippled" && span.xrpl.tx.hash="ABC123..."}
+```
+
+**Step 2: Get the trace_id from the trace view**
+
+```
+Trace ID: 4bf92f3577b34da6a3ce929d0e0e4736
+```
+
+**Step 3: Find related PerfLog entries**
+
+```
+# In Grafana Explore with Loki
+{job="rippled"} |= "4bf92f3577b34da6a3ce929d0e0e4736"
+```
+
+**Step 4: Check Insight metrics for the time window**
+
+```
+# In Grafana with Prometheus
+rate(rippled_tx_applied_total[1m])
+  @ timestamp_from_trace
+```
+
+### 7.7.4 Unified Dashboard Example
+
+```json
+{
+  "title": "rippled Unified Observability",
+  "uid": "rippled-unified",
+  "panels": [
+    {
+      "title": "Transaction Latency (Traces)",
+      "type": "timeseries",
+      "datasource": "Tempo",
+      "targets": [
+        {
+          "queryType": "traceql",
+          "query": "{resource.service.name=\"rippled\" && name=\"tx.receive\"} | histogram_over_time(duration)"
+        }
+      ],
+      "gridPos": { "h": 6, "w": 8, "x": 0, "y": 0 }
+    },
+    {
+      "title": "Transaction Rate (Metrics)",
+      "type": "timeseries",
+      "datasource": "Prometheus",
+      "targets": [
+        {
+          "expr": "rate(rippled_tx_received_total[5m])",
+          "legendFormat": "{{ instance }}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "links": [
+            {
+              "title": "View traces",
+              "url": "/explore?left={\"datasource\":\"Tempo\",\"query\":\"{resource.service.name=\\\"rippled\\\" && name=\\\"tx.receive\\\"}\"}"
+            }
+          ]
+        }
+      },
+      "gridPos": { "h": 6, "w": 8, "x": 8, "y": 0 }
+    },
+    {
+      "title": "Recent Logs",
+      "type": "logs",
+      "datasource": "Loki",
+      "targets": [
+        {
+          "expr": "{job=\"rippled\"} | json"
+        }
+      ],
+      "gridPos": { "h": 6, "w": 8, "x": 16, "y": 0 }
+    },
+    {
+      "title": "Trace Search",
+      "type": "table",
+      "datasource": "Tempo",
+      "targets": [
+        {
+          "queryType": "traceql",
+          "query": "{resource.service.name=\"rippled\"}"
+        }
+      ],
+      "fieldConfig": {
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "traceID" },
+            "properties": [
+              {
+                "id": "links",
+                "value": [
+                  {
+                    "title": "View trace",
+                    "url": "/explore?left={\"datasource\":\"Tempo\",\"query\":\"${__value.raw}\"}"
+                  },
+                  {
+                    "title": "View logs",
+                    "url": "/explore?left={\"datasource\":\"Loki\",\"query\":\"{job=\\\"rippled\\\"} |= \\\"${__value.raw}\\\"\"}"
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": { "h": 12, "w": 24, "x": 0, "y": 6 }
+    }
+  ]
+}
+```
+
+---
+
+_Previous: [Implementation Phases](./06-implementation-phases.md)_ | _Next: [Appendix](./08-appendix.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
--- a/OpenTelemetryPlan/08-appendix.md
+++ b/OpenTelemetryPlan/08-appendix.md
@@ -0,0 +1,200 @@
+# Appendix
+
+> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
+> **Related**: [Observability Backends](./07-observability-backends.md)
+
+---
+
+## 8.1 Glossary
+
+> **OTLP** = OpenTelemetry Protocol | **TxQ** = Transaction Queue
+
+| Term                  | Definition                                                 |
+| --------------------- | ---------------------------------------------------------- |
+| **Span**              | A unit of work with start/end time, name, and attributes   |
+| **Trace**             | A collection of spans representing a complete request flow |
+| **Trace ID**          | 128-bit unique identifier for a trace                      |
+| **Span ID**           | 64-bit unique identifier for a span within a trace         |
+| **Context**           | Carrier for trace/span IDs across boundaries               |
+| **Propagator**        | Component that injects/extracts context                    |
+| **Sampler**           | Decides which traces to record                             |
+| **Exporter**          | Sends spans to backend                                     |
+| **Collector**         | Receives, processes, and forwards telemetry                |
+| **OTLP**              | OpenTelemetry Protocol (wire format)                       |
+| **W3C Trace Context** | Standard HTTP headers for trace propagation                |
+| **Baggage**           | Key-value pairs propagated across service boundaries       |
+| **Resource**          | Entity producing telemetry (service, host, etc.)           |
+| **Instrumentation**   | Code that creates telemetry data                           |
+
+### rippled-Specific Terms
+
+| Term              | Definition                                                    |
+| ----------------- | ------------------------------------------------------------- |
+| **Overlay**       | P2P network layer managing peer connections                   |
+| **Consensus**     | XRP Ledger consensus algorithm (RCL)                          |
+| **Proposal**      | Validator's suggested transaction set for a ledger            |
+| **Validation**    | Validator's signature on a closed ledger                      |
+| **HashRouter**    | Component for transaction deduplication                       |
+| **JobQueue**      | Thread pool for asynchronous task execution                   |
+| **PerfLog**       | Existing performance logging system in rippled                |
+| **Beast Insight** | Existing metrics framework in rippled                         |
+| **PathFinding**   | Payment path computation engine for cross-currency payments   |
+| **TxQ**           | Transaction queue managing fee-based prioritization           |
+| **LoadManager**   | Dynamic fee escalation based on network load                  |
+| **SHAMap**        | SHA-256 hash-based map (Merkle trie variant) for ledger state |
+
+---
+
+## 8.2 Span Hierarchy Visualization
+
+> **TxQ** = Transaction Queue
+
+```mermaid
+flowchart TB
+    subgraph trace["Trace: Transaction Lifecycle"]
+        rpc["rpc.request<br/>(entry point)"]
+        validate["tx.validate"]
+        relay["tx.relay<br/>(parent span)"]
+
+        subgraph peers["Peer Spans"]
+            p1["peer.send<br/>Peer A"]
+            p2["peer.send<br/>Peer B"]
+            p3["peer.send<br/>Peer C"]
+        end
+
+        subgraph pathfinding["PathFinding Spans"]
+            pathfind["pathfind.request"]
+            pathcomp["pathfind.compute"]
+        end
+
+        consensus["consensus.round"]
+        apply["tx.apply"]
+
+        subgraph txqueue["TxQ Spans"]
+            txq["txq.enqueue"]
+            txqApply["txq.apply"]
+        end
+
+        feeCalc["fee.escalate"]
+    end
+
+    subgraph validators["Validator Spans"]
+        valFetch["validator.list.fetch"]
+        valManifest["validator.manifest"]
+    end
+
+    rpc --> validate
+    rpc --> pathfind
+    pathfind --> pathcomp
+    validate --> relay
+    relay --> p1
+    relay --> p2
+    relay --> p3
+    p1 -.->|"context propagation"| consensus
+    consensus --> apply
+    apply --> txq
+    txq --> txqApply
+    txq --> feeCalc
+
+    style trace fill:#0f172a,stroke:#020617,color:#fff
+    style peers fill:#1e3a8a,stroke:#172554,color:#fff
+    style pathfinding fill:#134e4a,stroke:#0f766e,color:#fff
+    style txqueue fill:#064e3b,stroke:#047857,color:#fff
+    style validators fill:#4c1d95,stroke:#6d28d9,color:#fff
+    style rpc fill:#1d4ed8,stroke:#1e40af,color:#fff
+    style validate fill:#047857,stroke:#064e3b,color:#fff
+    style relay fill:#047857,stroke:#064e3b,color:#fff
+    style p1 fill:#0e7490,stroke:#155e75,color:#fff
+    style p2 fill:#0e7490,stroke:#155e75,color:#fff
+    style p3 fill:#0e7490,stroke:#155e75,color:#fff
+    style consensus fill:#fef3c7,stroke:#fde68a,color:#1e293b
+    style apply fill:#047857,stroke:#064e3b,color:#fff
+    style pathfind fill:#0e7490,stroke:#155e75,color:#fff
+    style pathcomp fill:#0e7490,stroke:#155e75,color:#fff
+    style txq fill:#047857,stroke:#064e3b,color:#fff
+    style txqApply fill:#047857,stroke:#064e3b,color:#fff
+    style feeCalc fill:#047857,stroke:#064e3b,color:#fff
+    style valFetch fill:#6d28d9,stroke:#4c1d95,color:#fff
+    style valManifest fill:#6d28d9,stroke:#4c1d95,color:#fff
+```
+
+**Reading the diagram:**
+
+- **rpc.request (blue, top)**: The entry point — every traced transaction starts as an RPC call; this root span is the parent of all downstream work.
+- **tx.validate and pathfind.request (green/teal, first fork)**: The RPC request fans out into transaction validation and, for cross-currency payments, a PathFinding branch (`pathfind.request` -> `pathfind.compute`).
+- **tx.relay -> Peer Spans (teal, middle)**: After validation, the transaction is relayed to peers A, B, and C in parallel; each `peer.send` is a sibling child span showing fan-out across the network.
+- **context propagation (dashed arrow)**: The dotted line from `peer.send Peer A` to `consensus.round` represents the trace context crossing a node boundary — the receiving validator picks up the same `trace_id` and continues the trace.
+- **consensus.round -> tx.apply -> TxQ Spans (green, lower)**: Once consensus accepts the transaction, it is applied to the ledger; the TxQ spans (`txq.enqueue`, `txq.apply`, `fee.escalate`) capture queue depth and fee escalation behavior.
+- **Validator Spans (purple, detached)**: `validator.list.fetch` and `validator.manifest` are independent workflows for UNL management — they run on their own traces and are linked to consensus via Span Links, not parent-child relationships.
+
+---
+
+## 8.3 References
+
+> **OTLP** = OpenTelemetry Protocol
+
+### OpenTelemetry Resources
+
+1. [OpenTelemetry C++ SDK](https://github.com/open-telemetry/opentelemetry-cpp)
+2. [OpenTelemetry Specification](https://opentelemetry.io/docs/specs/otel/)
+3. [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/)
+4. [OTLP Protocol Specification](https://opentelemetry.io/docs/specs/otlp/)
+
+### Standards
+
+5. [W3C Trace Context](https://www.w3.org/TR/trace-context/)
+6. [W3C Baggage](https://www.w3.org/TR/baggage/)
+7. [Protocol Buffers](https://protobuf.dev/)
+
+### rippled Resources
+
+8. [rippled Source Code](https://github.com/XRPLF/rippled)
+9. [XRP Ledger Documentation](https://xrpl.org/docs/)
+10. [rippled Overlay README](https://github.com/XRPLF/rippled/blob/develop/src/xrpld/overlay/README.md)
+11. [rippled RPC README](https://github.com/XRPLF/rippled/blob/develop/src/xrpld/rpc/README.md)
+12. [rippled Consensus README](https://github.com/XRPLF/rippled/blob/develop/src/xrpld/app/consensus/README.md)
+
+---
+
+## 8.4 Version History
+
+| Version | Date       | Author | Changes                                                        |
+| ------- | ---------- | ------ | -------------------------------------------------------------- |
+| 1.0     | 2026-02-12 | -      | Initial implementation plan                                    |
+| 1.1     | 2026-02-13 | -      | Refactored into modular documents                              |
+| 1.2     | 2026-03-24 | -      | Review fixes: accuracy corrections, cross-document consistency |
+
+---
+
+## 8.5 Document Index
+
+### Plan Documents
+
+| Document                                                         | Description                                  |
+| ---------------------------------------------------------------- | -------------------------------------------- |
+| [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)                   | Master overview and executive summary        |
+| [00-tracing-fundamentals.md](./00-tracing-fundamentals.md)       | Distributed tracing concepts and OTel primer |
+| [01-architecture-analysis.md](./01-architecture-analysis.md)     | rippled architecture and trace points        |
+| [02-design-decisions.md](./02-design-decisions.md)               | SDK selection, exporters, span conventions   |
+| [03-implementation-strategy.md](./03-implementation-strategy.md) | Directory structure, performance analysis    |
+| [04-code-samples.md](./04-code-samples.md)                       | C++ code examples for all components         |
+| [05-configuration-reference.md](./05-configuration-reference.md) | rippled config, CMake, Collector configs     |
+| [06-implementation-phases.md](./06-implementation-phases.md)     | Timeline, tasks, risks, success metrics      |
+| [07-observability-backends.md](./07-observability-backends.md)   | Backend selection and architecture           |
+| [08-appendix.md](./08-appendix.md)                               | Glossary, references, version history        |
+| [presentation.md](./presentation.md)                             | Slide deck for OTel plan overview            |
+
+### Task Lists
+
+| Document                                   | Description                                         |
+| ------------------------------------------ | --------------------------------------------------- |
+| [POC_taskList.md](./POC_taskList.md)       | Proof-of-concept telemetry integration              |
+| [Phase2_taskList.md](./Phase2_taskList.md) | RPC layer trace instrumentation                     |
+| [Phase3_taskList.md](./Phase3_taskList.md) | Peer overlay & consensus tracing                    |
+| [Phase4_taskList.md](./Phase4_taskList.md) | Transaction lifecycle tracing                       |
+| [Phase5_taskList.md](./Phase5_taskList.md) | Ledger processing & advanced tracing                |
+| [presentation.md](./presentation.md)       | Presentation slides for OpenTelemetry plan overview |
+
+---
+
+_Previous: [Observability Backends](./07-observability-backends.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
--- a/OpenTelemetryPlan/OpenTelemetryPlan.md
+++ b/OpenTelemetryPlan/OpenTelemetryPlan.md
@@ -0,0 +1,230 @@
+# [OpenTelemetry](00-tracing-fundamentals.md) Distributed Tracing Implementation Plan for rippled (xrpld)
+
+## Executive Summary
+
+> **OTLP** = OpenTelemetry Protocol
+
+This document provides a comprehensive implementation plan for integrating OpenTelemetry distributed tracing into the rippled XRP Ledger node software. The plan addresses the unique challenges of a decentralized peer-to-peer system where trace context must propagate across network boundaries between independent nodes.
+
+### Key Benefits
+
+- **End-to-end transaction visibility**: Track transactions from submission through consensus to ledger inclusion
+- **Consensus round analysis**: Understand timing and behavior of consensus phases across validators
+- **RPC performance insights**: Identify slow handlers and optimize response times
+- **Network topology understanding**: Visualize message propagation patterns between peers
+- **Incident debugging**: Correlate events across distributed nodes during issues
+
+### Estimated Performance Overhead
+
+| Metric        | Overhead   | Notes                               |
+| ------------- | ---------- | ----------------------------------- |
+| CPU           | 1-3%       | Span creation and attribute setting |
+| Memory        | 2-5 MB     | Batch buffer for pending spans      |
+| Network       | 10-50 KB/s | Compressed OTLP export to collector |
+| Latency (p99) | <2%        | With proper sampling configuration  |
+
+---
+
+## Document Structure
+
+This implementation plan is organized into modular documents for easier navigation:
+
+<div align="center">
+
+```mermaid
+flowchart TB
+    overview["📋 OpenTelemetryPlan.md<br/>(This Document)"]
+
+    subgraph fundamentals["Fundamentals"]
+        fund["00-tracing-fundamentals.md"]
+    end
+
+    subgraph analysis["Analysis & Design"]
+        arch["01-architecture-analysis.md"]
+        design["02-design-decisions.md"]
+    end
+
+    subgraph impl["Implementation"]
+        strategy["03-implementation-strategy.md"]
+        code["04-code-samples.md"]
+        config["05-configuration-reference.md"]
+    end
+
+    subgraph deploy["Deployment & Planning"]
+        phases["06-implementation-phases.md"]
+        backends["07-observability-backends.md"]
+        appendix["08-appendix.md"]
+        poc["POC_taskList.md"]
+    end
+
+    overview --> fundamentals
+    overview --> analysis
+    overview --> impl
+    overview --> deploy
+
+    fund --> arch
+    arch --> design
+    design --> strategy
+    strategy --> code
+    code --> config
+    config --> phases
+    phases --> backends
+    backends --> appendix
+    phases --> poc
+
+    style overview fill:#1b5e20,stroke:#0d3d14,color:#fff,stroke-width:2px
+    style fundamentals fill:#00695c,stroke:#004d40,color:#fff
+    style fund fill:#00695c,stroke:#004d40,color:#fff
+    style analysis fill:#0d47a1,stroke:#082f6a,color:#fff
+    style impl fill:#bf360c,stroke:#8c2809,color:#fff
+    style deploy fill:#4a148c,stroke:#2e0d57,color:#fff
+    style arch fill:#0d47a1,stroke:#082f6a,color:#fff
+    style design fill:#0d47a1,stroke:#082f6a,color:#fff
+    style strategy fill:#bf360c,stroke:#8c2809,color:#fff
+    style code fill:#bf360c,stroke:#8c2809,color:#fff
+    style config fill:#bf360c,stroke:#8c2809,color:#fff
+    style phases fill:#4a148c,stroke:#2e0d57,color:#fff
+    style backends fill:#4a148c,stroke:#2e0d57,color:#fff
+    style appendix fill:#4a148c,stroke:#2e0d57,color:#fff
+    style poc fill:#4a148c,stroke:#2e0d57,color:#fff
+```
+
+</div>
+
+---
+
+## Table of Contents
+
+| Section | Document                                                   | Description                                                            |
+| ------- | ---------------------------------------------------------- | ---------------------------------------------------------------------- |
+| **0**   | [Tracing Fundamentals](./00-tracing-fundamentals.md)       | Distributed tracing concepts, span relationships, context propagation  |
+| **1**   | [Architecture Analysis](./01-architecture-analysis.md)     | rippled component analysis, trace points, instrumentation priorities   |
+| **2**   | [Design Decisions](./02-design-decisions.md)               | SDK selection, exporters, span naming, attributes, context propagation |
+| **3**   | [Implementation Strategy](./03-implementation-strategy.md) | Directory structure, key principles, performance optimization          |
+| **4**   | [Code Samples](./04-code-samples.md)                       | C++ implementation examples for core infrastructure and key modules    |
+| **5**   | [Configuration Reference](./05-configuration-reference.md) | rippled config, CMake integration, Collector configurations            |
+| **6**   | [Implementation Phases](./06-implementation-phases.md)     | 5-phase timeline, tasks, risks, success metrics                        |
+| **7**   | [Observability Backends](./07-observability-backends.md)   | Backend selection guide and production architecture                    |
+| **8**   | [Appendix](./08-appendix.md)                               | Glossary, references, version history                                  |
+| **POC** | [POC Task List](./POC_taskList.md)                         | Proof of concept tasks for RPC tracing end-to-end demo                 |
+
+---
+
+## 0. Tracing Fundamentals
+
+This document introduces distributed tracing concepts for readers unfamiliar with the domain. It covers what traces and spans are, how parent-child and follows-from relationships model causality, how context propagates across service boundaries, and how sampling controls data volume. It also maps these concepts to rippled-specific scenarios like transaction relay and consensus.
+
+➡️ **[Read Tracing Fundamentals](./00-tracing-fundamentals.md)**
+
+---
+
+## 1. Architecture Analysis
+
+> **WS** = WebSocket | **TxQ** = Transaction Queue
+
+The rippled node consists of several key components that require instrumentation for comprehensive distributed tracing. The main areas include the RPC server (HTTP/WebSocket), Overlay P2P network, Consensus mechanism (RCLConsensus), JobQueue for async task execution, PathFinding, Transaction Queue (TxQ), fee escalation (LoadManager), ledger acquisition, validator management, and existing observability infrastructure (PerfLog, Insight/StatsD, Journal logging).
+
+Key trace points span across transaction submission via RPC, peer-to-peer message propagation, consensus round execution, ledger building, path computation, transaction queue behavior, fee escalation, and validator health. The implementation prioritizes high-value, low-risk components first: RPC handlers provide immediate value with minimal risk, while consensus tracing requires careful implementation to avoid timing impacts.
+
+➡️ **[Read full Architecture Analysis](./01-architecture-analysis.md)**
+
+---
+
+## 2. Design Decisions
+
+> **OTLP** = OpenTelemetry Protocol | **CNCF** = Cloud Native Computing Foundation
+
+The OpenTelemetry C++ SDK is selected for its CNCF backing, active development, and native performance characteristics. Traces are exported via OTLP/gRPC (primary) or OTLP/HTTP (fallback) to an OpenTelemetry Collector, which provides flexible routing and sampling.
+
+Span naming follows a hierarchical `<component>.<operation>` convention (e.g., `rpc.submit`, `tx.relay`, `consensus.round`). Context propagation uses W3C Trace Context headers for HTTP and embedded Protocol Buffer fields for P2P messages. The implementation coexists with existing PerfLog and Insight observability systems through correlation IDs.
+
+**Data Collection & Privacy**: Telemetry collects only operational metadata (timing, counts, hashes) — never sensitive content (private keys, balances, amounts, raw payloads). Privacy protection includes account hashing, configurable redaction, sampling, and collector-level filtering. Node operators retain full control over telemetry configuration.
+
+➡️ **[Read full Design Decisions](./02-design-decisions.md)**
+
+---
+
+## 3. Implementation Strategy
+
+The telemetry code is organized under `include/xrpl/telemetry/` for headers and `src/libxrpl/telemetry/` for implementation. Key principles include RAII-based span management via `SpanGuard`, conditional compilation with `XRPL_ENABLE_TELEMETRY`, and minimal runtime overhead through batch processing and efficient sampling.
+
+Performance optimization strategies include probabilistic head sampling (10% default), tail-based sampling at the collector for errors and slow traces, batch export to reduce network overhead, and conditional instrumentation that compiles to no-ops when disabled.
+
+➡️ **[Read full Implementation Strategy](./03-implementation-strategy.md)**
+
+---
+
+## 4. Code Samples
+
+C++ implementation examples are provided for the core telemetry infrastructure and key modules:
+
+- `Telemetry.h` - Core interface for tracer access and span creation
+- `SpanGuard.h` - RAII wrapper for automatic span lifecycle management
+- `TracingInstrumentation.h` - Macros for conditional instrumentation
+- Protocol Buffer extensions for trace context propagation
+- Module-specific instrumentation (RPC, Consensus, P2P, JobQueue)
+- Remaining modules (PathFinding, TxQ, Validator, etc.) follow the same patterns
+
+➡️ **[View all Code Samples](./04-code-samples.md)**
+
+---
+
+## 5. Configuration Reference
+
+> **OTLP** = OpenTelemetry Protocol | **APM** = Application Performance Monitoring
+
+Configuration is handled through the `[telemetry]` section in `xrpld.cfg` with options for enabling/disabling, exporter selection, endpoint configuration, sampling ratios, and component-level filtering. CMake integration includes a `XRPL_ENABLE_TELEMETRY` option for compile-time control.
+
+OpenTelemetry Collector configurations are provided for development and production (with tail-based sampling, Tempo, and Elastic APM). Docker Compose examples enable quick local development environment setup.
+
+➡️ **[View full Configuration Reference](./05-configuration-reference.md)**
+
+---
+
+## 6. Implementation Phases
+
+The implementation spans 9 weeks across 5 phases:
+
+| Phase | Duration  | Focus               | Key Deliverables                                    |
+| ----- | --------- | ------------------- | --------------------------------------------------- |
+| 1     | Weeks 1-2 | Core Infrastructure | SDK integration, Telemetry interface, Configuration |
+| 2     | Weeks 3-4 | RPC Tracing         | HTTP context extraction, Handler instrumentation    |
+| 3     | Weeks 5-6 | Transaction Tracing | Protocol Buffer context, Relay propagation          |
+| 4     | Weeks 7-8 | Consensus Tracing   | Round spans, Proposal/validation tracing            |
+| 5     | Week 9    | Documentation       | Runbook, Dashboards, Training                       |
+
+**Total Effort**: 47 person-days (2 developers working in parallel)
+
+➡️ **[View full Implementation Phases](./06-implementation-phases.md)**
+
+---
+
+## 7. Observability Backends
+
+> **APM** = Application Performance Monitoring | **GCS** = Google Cloud Storage
+
+Grafana Tempo is recommended for all environments due to its cost-effectiveness and Grafana integration, while Elastic APM is ideal for organizations with existing Elastic infrastructure.
+
+The recommended production architecture uses a gateway collector pattern with regional collectors performing tail-based sampling, routing traces to multiple backends (Tempo for primary storage, Elastic for log correlation, S3/GCS for long-term archive).
+
+➡️ **[View Observability Backend Recommendations](./07-observability-backends.md)**
+
+---
+
+## 8. Appendix
+
+The appendix contains a glossary of OpenTelemetry and rippled-specific terms, references to external documentation and specifications, version history for this implementation plan, and a complete document index.
+
+➡️ **[View Appendix](./08-appendix.md)**
+
+---
+
+## POC Task List
+
+A step-by-step task list for building a minimal end-to-end proof of concept that demonstrates distributed tracing in rippled. The POC scope is limited to RPC tracing — showing request traces flowing from rippled through an OpenTelemetry Collector into Tempo, viewable in Grafana.
+
+➡️ **[View POC Task List](./POC_taskList.md)**
+
+---
+
+_This document provides a comprehensive implementation plan for integrating OpenTelemetry distributed tracing into the rippled XRP Ledger node software. For detailed information on any section, follow the links to the corresponding sub-documents._
--- a/OpenTelemetryPlan/POC_taskList.md
+++ b/OpenTelemetryPlan/POC_taskList.md
@@ -0,0 +1,620 @@
+# OpenTelemetry POC Task List
+
+> **Goal**: Build a minimal end-to-end proof of concept that demonstrates distributed tracing in rippled. A successful POC will show RPC request traces flowing from rippled through an OTel Collector into Tempo, viewable in Grafana.
+>
+> **Scope**: RPC tracing only (highest value, lowest risk per the [CRAWL phase](./06-implementation-phases.md#6102-quick-wins-immediate-value) in the implementation phases). No cross-node P2P context propagation or consensus tracing in the POC.
+
+### Related Plan Documents
+
+| Document                                                         | Relevance to POC                                                                                                                                          |
+| ---------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [00-tracing-fundamentals.md](./00-tracing-fundamentals.md)       | Core concepts: traces, spans, context propagation, sampling                                                                                               |
+| [01-architecture-analysis.md](./01-architecture-analysis.md)     | RPC request flow (§1.5), key trace points (§1.6), instrumentation priority (§1.7)                                                                         |
+| [02-design-decisions.md](./02-design-decisions.md)               | SDK selection (§2.1), exporter config (§2.2), span naming (§2.3), attribute schema (§2.4), coexistence with PerfLog/Insight (§2.6)                        |
+| [03-implementation-strategy.md](./03-implementation-strategy.md) | Directory structure (§3.1), key principles (§3.2), performance overhead (§3.3-3.6), conditional compilation (§3.7.3), code intrusiveness (§3.9)           |
+| [04-code-samples.md](./04-code-samples.md)                       | Telemetry interface (§4.1), SpanGuard (§4.2), macros (§4.3), RPC instrumentation (§4.5.3)                                                                 |
+| [05-configuration-reference.md](./05-configuration-reference.md) | rippled config (§5.1), config parser (§5.2), Application integration (§5.3), CMake (§5.4), Collector config (§5.5), Docker Compose (§5.6), Grafana (§5.8) |
+| [06-implementation-phases.md](./06-implementation-phases.md)     | Phase 1 core tasks (§6.2), Phase 2 RPC tasks (§6.3), quick wins (§6.10), definition of done (§6.11)                                                       |
+| [07-observability-backends.md](./07-observability-backends.md)   | Tempo dev setup (§7.1), Grafana dashboards (§7.6), alert rules (§7.6.3)                                                                                   |
+
+---
+
+## Task 0: Docker Observability Stack Setup
+
+> **OTLP** = OpenTelemetry Protocol
+
+**Objective**: Stand up the backend infrastructure to receive, store, and display traces.
+
+**What to do**:
+
+- Create `docker/telemetry/docker-compose.yml` in the repo with three services:
+  1. **OpenTelemetry Collector** (`otel/opentelemetry-collector-contrib:0.92.0`)
+     - Expose ports `4317` (OTLP gRPC) and `4318` (OTLP HTTP)
+     - Expose port `13133` (health check)
+     - Mount a config file `docker/telemetry/otel-collector-config.yaml`
+  2. **Tempo** (`grafana/tempo:2.6.1`)
+     - Expose port `3200` (HTTP API) and `4317` (OTLP gRPC, internal)
+  3. **Grafana** (`grafana/grafana:latest`) — optional but useful
+     - Expose port `3000`
+     - Enable anonymous admin access for local dev (`GF_AUTH_ANONYMOUS_ENABLED=true`, `GF_AUTH_ANONYMOUS_ORG_ROLE=Admin`)
+     - Provision Tempo as a data source via `docker/telemetry/grafana/provisioning/datasources/tempo.yaml`
+
+- Create `docker/telemetry/otel-collector-config.yaml`:
+
+  ```yaml
+  receivers:
+    otlp:
+      protocols:
+        grpc:
+          endpoint: 0.0.0.0:4317
+        http:
+          endpoint: 0.0.0.0:4318
+
+  processors:
+    batch:
+      timeout: 1s
+      send_batch_size: 100
+
+  exporters:
+    logging:
+      verbosity: detailed
+    otlp/tempo:
+      endpoint: tempo:4317
+      tls:
+        insecure: true
+
+  service:
+    pipelines:
+      traces:
+        receivers: [otlp]
+        processors: [batch]
+        exporters: [logging, otlp/tempo]
+  ```
+
+- Create Grafana Tempo datasource provisioning file at `docker/telemetry/grafana/provisioning/datasources/tempo.yaml`:
+  ```yaml
+  apiVersion: 1
+  datasources:
+    - name: Tempo
+      type: tempo
+      access: proxy
+      url: http://tempo:3200
+  ```
+
+**Verification**: Run `docker compose -f docker/telemetry/docker-compose.yml up -d`, then:
+
+- `curl http://localhost:13133` returns healthy (Collector)
+- `http://localhost:3000` opens Grafana (Tempo datasource available, no traces yet)
+
+**Reference**:
+
+- [05-configuration-reference.md §5.5](./05-configuration-reference.md) — Collector config (dev YAML with Tempo exporter)
+- [05-configuration-reference.md §5.6](./05-configuration-reference.md) — Docker Compose development environment
+- [07-observability-backends.md §7.1](./07-observability-backends.md) — Tempo quick start and backend selection
+- [05-configuration-reference.md §5.8](./05-configuration-reference.md) — Grafana datasource provisioning and dashboards
+
+---
+
+## Task 1: Add OpenTelemetry C++ SDK Dependency
+
+**Objective**: Make `opentelemetry-cpp` available to the build system.
+
+**What to do**:
+
+- Edit `conanfile.py` to add `opentelemetry-cpp` as an **optional** dependency. The gRPC otel plugin flag (`"grpc/*:otel_plugin": False`) in the existing conanfile may need to remain false — we pull the OTel SDK separately.
+  - Add a Conan option: `with_telemetry = [True, False]` defaulting to `False`
+  - When `with_telemetry` is `True`, add `opentelemetry-cpp` to `self.requires()`
+  - Required OTel Conan components: `opentelemetry-cpp` (which bundles api, sdk, and exporters). If the package isn't in Conan Center, consider using `FetchContent` in CMake or building from source as a fallback.
+- Edit `CMakeLists.txt`:
+  - Add option: `option(XRPL_ENABLE_TELEMETRY "Enable OpenTelemetry tracing" OFF)`
+  - When ON, `find_package(opentelemetry-cpp CONFIG REQUIRED)` and add compile definition `XRPL_ENABLE_TELEMETRY`
+  - When OFF, do nothing (zero build impact)
+- Verify the build succeeds with `-DXRPL_ENABLE_TELEMETRY=OFF` (no regressions) and with `-DXRPL_ENABLE_TELEMETRY=ON` (SDK links successfully).
+
+**Key files**:
+
+- `conanfile.py`
+- `CMakeLists.txt`
+
+**Reference**:
+
+- [05-configuration-reference.md §5.4](./05-configuration-reference.md) — CMake integration, `FindOpenTelemetry.cmake`, `XRPL_ENABLE_TELEMETRY` option
+- [03-implementation-strategy.md §3.2](./03-implementation-strategy.md) — Key principle: zero-cost when disabled via compile-time flags
+- [02-design-decisions.md §2.1](./02-design-decisions.md) — SDK selection rationale and required OTel components
+
+---
+
+## Task 2: Create Core Telemetry Interface and NullTelemetry
+
+**Objective**: Define the `Telemetry` abstract interface and a no-op implementation so the rest of the codebase can reference telemetry without hard-depending on the OTel SDK.
+
+**What to do**:
+
+- Create `include/xrpl/telemetry/Telemetry.h`:
+  - Define `namespace xrpl::telemetry`
+  - Define `struct Telemetry::Setup` holding: `enabled`, `exporterEndpoint`, `samplingRatio`, `serviceName`, `serviceVersion`, `serviceInstanceId`, `traceRpc`, `traceTransactions`, `traceConsensus`, `tracePeer`
+  - Define abstract `class Telemetry` with:
+    - `virtual void start() = 0;`
+    - `virtual void stop() = 0;`
+    - `virtual bool isEnabled() const = 0;`
+    - `virtual nostd::shared_ptr<Tracer> getTracer(string_view name = "rippled") = 0;`
+    - `virtual nostd::shared_ptr<Span> startSpan(string_view name, SpanKind kind = kInternal) = 0;`
+    - `virtual nostd::shared_ptr<Span> startSpan(string_view name, Context const& parentContext, SpanKind kind = kInternal) = 0;`
+    - `virtual bool shouldTraceRpc() const = 0;`
+    - `virtual bool shouldTraceTransactions() const = 0;`
+    - `virtual bool shouldTraceConsensus() const = 0;`
+  - Factory: `std::unique_ptr<Telemetry> make_Telemetry(Setup const&, beast::Journal);`
+  - Config parser: `Telemetry::Setup setup_Telemetry(Section const&, std::string const& nodePublicKey, std::string const& version);`
+
+- Create `include/xrpl/telemetry/SpanGuard.h`:
+  - RAII guard that takes an `nostd::shared_ptr<Span>`, creates a `Scope`, and calls `span->End()` in destructor.
+  - Convenience: `setAttribute()`, `setOk()`, `setStatus()`, `addEvent()`, `recordException()`, `context()`
+  - See [04-code-samples.md](./04-code-samples.md) §4.2 for the full implementation.
+
+- Create `src/libxrpl/telemetry/NullTelemetry.cpp`:
+  - Implements `Telemetry` with all no-ops.
+  - `isEnabled()` returns `false`, `startSpan()` returns a noop span.
+  - This is used when `XRPL_ENABLE_TELEMETRY` is OFF or `enabled=0` in config.
+
+- Guard all OTel SDK headers behind `#ifdef XRPL_ENABLE_TELEMETRY`. The `NullTelemetry` implementation should compile without the OTel SDK present.
+
+**Key new files**:
+
+- `include/xrpl/telemetry/Telemetry.h`
+- `include/xrpl/telemetry/SpanGuard.h`
+- `src/libxrpl/telemetry/NullTelemetry.cpp`
+
+**Reference**:
+
+- [04-code-samples.md §4.1](./04-code-samples.md) — Full `Telemetry` interface with `Setup` struct, lifecycle, tracer access, span creation, and component filtering methods
+- [04-code-samples.md §4.2](./04-code-samples.md) — Full `SpanGuard` RAII implementation and `NullSpanGuard` no-op class
+- [03-implementation-strategy.md §3.1](./03-implementation-strategy.md) — Directory structure: `include/xrpl/telemetry/` for headers, `src/libxrpl/telemetry/` for implementation
+- [03-implementation-strategy.md §3.7.3](./03-implementation-strategy.md) — Conditional instrumentation and zero-cost compile-time disabled pattern
+
+---
+
+## Task 3: Implement OTel-Backed Telemetry
+
+> **OTLP** = OpenTelemetry Protocol
+
+**Objective**: Implement the real `Telemetry` class that initializes the OTel SDK, configures the OTLP exporter and batch processor, and creates tracers/spans.
+
+**What to do**:
+
+- Create `src/libxrpl/telemetry/Telemetry.cpp` (compiled only when `XRPL_ENABLE_TELEMETRY=ON`):
+  - `class TelemetryImpl : public Telemetry` that:
+    - In `start()`: creates a `TracerProvider` with:
+      - Resource attributes: `service.name`, `service.version`, `service.instance.id`
+      - An `OtlpHttpExporter` pointed at `setup.exporterEndpoint` (default `localhost:4318`)
+      - A `BatchSpanProcessor` with configurable batch size and delay
+      - A `TraceIdRatioBasedSampler` using `setup.samplingRatio`
+    - Sets the global `TracerProvider`
+    - In `stop()`: calls `ForceFlush()` then shuts down the provider
+    - In `startSpan()`: delegates to `getTracer()->StartSpan(name, ...)`
+    - `shouldTraceRpc()` etc. read from `Setup` fields
+
+- Create `src/libxrpl/telemetry/TelemetryConfig.cpp`:
+  - `setup_Telemetry()` parses the `[telemetry]` config section from `xrpld.cfg`
+  - Maps config keys: `enabled`, `exporter`, `endpoint`, `sampling_ratio`, `trace_rpc`, `trace_transactions`, `trace_consensus`, `trace_peer`
+
+- Wire `make_Telemetry()` factory:
+  - If `setup.enabled` is true AND `XRPL_ENABLE_TELEMETRY` is defined: return `TelemetryImpl`
+  - Otherwise: return `NullTelemetry`
+
+- Add telemetry source files to CMake. When `XRPL_ENABLE_TELEMETRY=ON`, compile `Telemetry.cpp` and `TelemetryConfig.cpp` and link against `opentelemetry-cpp::api`, `opentelemetry-cpp::sdk`, `opentelemetry-cpp::otlp_grpc_exporter`. When OFF, compile only `NullTelemetry.cpp`.
+
+**Key new files**:
+
+- `src/libxrpl/telemetry/Telemetry.cpp`
+- `src/libxrpl/telemetry/TelemetryConfig.cpp`
+
+**Key modified files**:
+
+- `CMakeLists.txt` (add telemetry library target)
+
+**Reference**:
+
+- [04-code-samples.md §4.1](./04-code-samples.md) — `Telemetry` interface that `TelemetryImpl` must implement
+- [05-configuration-reference.md §5.2](./05-configuration-reference.md) — `setup_Telemetry()` config parser implementation
+- [02-design-decisions.md §2.2](./02-design-decisions.md) — OTLP/gRPC exporter config (endpoint, TLS options)
+- [02-design-decisions.md §2.4.1](./02-design-decisions.md) — Resource attributes: `service.name`, `service.version`, `service.instance.id`, `xrpl.network.id`
+- [03-implementation-strategy.md §3.4](./03-implementation-strategy.md) — Per-operation CPU costs and overhead budget for span creation
+- [03-implementation-strategy.md §3.5](./03-implementation-strategy.md) — Memory overhead: static (~456 KB) and dynamic (~1.2 MB) budgets
+
+---
+
+## Task 4: Integrate Telemetry into Application Lifecycle
+
+**Objective**: Wire the `Telemetry` object into `Application` so all components can access it.
+
+**What to do**:
+
+- Edit `src/xrpld/app/main/Application.h`:
+  - Forward-declare `namespace xrpl::telemetry { class Telemetry; }`
+  - Add pure virtual method: `virtual telemetry::Telemetry& getTelemetry() = 0;`
+
+- Edit `src/xrpld/app/main/Application.cpp` (the `ApplicationImp` class):
+  - Add member: `std::unique_ptr<telemetry::Telemetry> telemetry_;`
+  - In the constructor, after config is loaded and node identity is known:
+    ```cpp
+    auto const telemetrySection = config_->section("telemetry");
+    auto telemetrySetup = telemetry::setup_Telemetry(
+        telemetrySection,
+        toBase58(TokenType::NodePublic, nodeIdentity_.publicKey()),
+        BuildInfo::getVersionString());
+    telemetry_ = telemetry::make_Telemetry(telemetrySetup, logs_->journal("Telemetry"));
+    ```
+  - In `start()`: call `telemetry_->start()` early
+  - In `stop()` or destructor: call `telemetry_->stop()` late (to flush pending spans)
+  - Implement `getTelemetry()` override: return `*telemetry_`
+
+- Add `[telemetry]` section to the example config `cfg/rippled-example.cfg`:
+  ```ini
+  # [telemetry]
+  # enabled=1
+  # endpoint=localhost:4317
+  # sampling_ratio=1.0
+  # trace_rpc=1
+  ```
+
+**Key modified files**:
+
+- `src/xrpld/app/main/Application.h`
+- `src/xrpld/app/main/Application.cpp`
+- `cfg/rippled-example.cfg` (or equivalent example config)
+
+**Reference**:
+
+- [05-configuration-reference.md §5.3](./05-configuration-reference.md) — `ApplicationImp` changes: member declaration, constructor init, `start()`/`stop()` wiring, `getTelemetry()` override
+- [05-configuration-reference.md §5.1](./05-configuration-reference.md) — `[telemetry]` config section format and all option defaults
+- [03-implementation-strategy.md §3.9.2](./03-implementation-strategy.md) — File impact assessment: `Application.cpp` ~15 lines added, ~3 changed (Low risk)
+
+---
+
+## Task 5: Create Instrumentation Macros
+
+**Objective**: Define convenience macros that make instrumenting code one-liners, and that compile to zero-cost no-ops when telemetry is disabled.
+
+**What to do**:
+
+- Create `src/xrpld/telemetry/TracingInstrumentation.h`:
+  - When `XRPL_ENABLE_TELEMETRY` is defined:
+
+    ```cpp
+    #define XRPL_TRACE_SPAN(telemetry, name) \
+        auto _xrpl_span_ = (telemetry).startSpan(name); \
+        ::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
+
+    #define XRPL_TRACE_RPC(telemetry, name) \
+        std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
+        if ((telemetry).shouldTraceRpc()) { \
+            _xrpl_guard_.emplace((telemetry).startSpan(name)); \
+        }
+
+    #define XRPL_TRACE_SET_ATTR(key, value) \
+        if (_xrpl_guard_.has_value()) { \
+            _xrpl_guard_->setAttribute(key, value); \
+        }
+
+    #define XRPL_TRACE_EXCEPTION(e) \
+        if (_xrpl_guard_.has_value()) { \
+            _xrpl_guard_->recordException(e); \
+        }
+    ```
+
+  - When `XRPL_ENABLE_TELEMETRY` is NOT defined, all macros expand to `((void)0)`
+
+**Key new file**:
+
+- `src/xrpld/telemetry/TracingInstrumentation.h`
+
+**Reference**:
+
+- [04-code-samples.md §4.3](./04-code-samples.md) — Full macro definitions for `XRPL_TRACE_SPAN`, `XRPL_TRACE_RPC`, `XRPL_TRACE_CONSENSUS`, `XRPL_TRACE_SET_ATTR`, `XRPL_TRACE_EXCEPTION` with both enabled and disabled branches
+- [03-implementation-strategy.md §3.7.3](./03-implementation-strategy.md) — Conditional instrumentation pattern: compile-time `#ifndef` and runtime `shouldTrace*()` checks
+- [03-implementation-strategy.md §3.9.7](./03-implementation-strategy.md) — Before/after code examples showing minimal intrusiveness (~1-3 lines per instrumentation point)
+
+---
+
+## Task 6: Instrument RPC ServerHandler
+
+> **WS** = WebSocket
+
+**Objective**: Add tracing to the HTTP RPC entry point so every incoming RPC request creates a span.
+
+**What to do**:
+
+- Edit `src/xrpld/rpc/detail/ServerHandler.cpp`:
+  - `#include` the `TracingInstrumentation.h` header
+  - In `ServerHandler::onRequest(Session& session)`:
+    - At the top of the method, add: `XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.request");`
+    - After the RPC command name is extracted, set attribute: `XRPL_TRACE_SET_ATTR("xrpl.rpc.command", command);`
+    - After the response status is known, set: `XRPL_TRACE_SET_ATTR("http.status_code", static_cast<int64_t>(statusCode));`
+    - Wrap error paths with: `XRPL_TRACE_EXCEPTION(e);`
+  - In `ServerHandler::processRequest(...)`:
+    - Add a child span: `XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.process");`
+    - Set method attribute: `XRPL_TRACE_SET_ATTR("xrpl.rpc.method", request_method);`
+  - In `ServerHandler::onWSMessage(...)` (WebSocket path):
+    - Add: `XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.ws.message");`
+
+- The goal is to see spans like:
+  ```
+  rpc.request
+    └── rpc.process
+  ```
+  in Tempo/Grafana for every HTTP RPC call.
+
+**Key modified file**:
+
+- `src/xrpld/rpc/detail/ServerHandler.cpp` (~15-25 lines added)
+
+**Reference**:
+
+- [04-code-samples.md §4.5.3](./04-code-samples.md) — Complete `ServerHandler::onRequest()` instrumented code sample with W3C header extraction, span creation, attribute setting, and error handling
+- [01-architecture-analysis.md §1.5](./01-architecture-analysis.md) — RPC request flow diagram: HTTP request -> attributes -> jobqueue.enqueue -> rpc.command -> response
+- [01-architecture-analysis.md §1.6](./01-architecture-analysis.md) — Key trace points table: `rpc.request` in `ServerHandler.cpp::onRequest()` (Priority: High)
+- [02-design-decisions.md §2.3](./02-design-decisions.md) — Span naming convention: `rpc.request`, `rpc.command.*`
+- [02-design-decisions.md §2.4.2](./02-design-decisions.md) — RPC span attributes: `xrpl.rpc.command`, `xrpl.rpc.version`, `xrpl.rpc.role`, `xrpl.rpc.params`
+- [03-implementation-strategy.md §3.9.2](./03-implementation-strategy.md) — File impact: `ServerHandler.cpp` ~40 lines added, ~10 changed (Low risk)
+
+---
+
+## Task 7: Instrument RPC Command Execution
+
+**Objective**: Add per-command tracing inside the RPC handler so each command (e.g., `submit`, `account_info`, `server_info`) gets its own child span.
+
+**What to do**:
+
+- Edit `src/xrpld/rpc/detail/RPCHandler.cpp`:
+  - `#include` the `TracingInstrumentation.h` header
+  - In `doCommand(RPC::JsonContext& context, Json::Value& result)`:
+    - At the top: `XRPL_TRACE_RPC(context.app.getTelemetry(), "rpc.command." + context.method);`
+    - Set attributes:
+      - `XRPL_TRACE_SET_ATTR("xrpl.rpc.command", context.method);`
+      - `XRPL_TRACE_SET_ATTR("xrpl.rpc.version", static_cast<int64_t>(context.apiVersion));`
+      - `XRPL_TRACE_SET_ATTR("xrpl.rpc.role", (context.role == Role::ADMIN) ? "admin" : "user");`
+    - On success: `XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");`
+    - On error: `XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "error");` and set the error message
+
+- After this, traces in Tempo/Grafana should look like:
+  ```
+  rpc.request  (xrpl.rpc.command=account_info)
+    └── rpc.process
+          └── rpc.command.account_info  (xrpl.rpc.version=2, xrpl.rpc.role=user, xrpl.rpc.status=success)
+  ```
+
+**Key modified file**:
+
+- `src/xrpld/rpc/detail/RPCHandler.cpp` (~15-20 lines added)
+
+**Reference**:
+
+- [04-code-samples.md §4.5.3](./04-code-samples.md) — `ServerHandler::onRequest()` code sample (includes child span pattern for `rpc.command.*`)
+- [02-design-decisions.md §2.3](./02-design-decisions.md) — Span naming: `rpc.command.*` pattern with dynamic command name (e.g., `rpc.command.server_info`)
+- [02-design-decisions.md §2.4.2](./02-design-decisions.md) — RPC attribute schema: `xrpl.rpc.command`, `xrpl.rpc.version`, `xrpl.rpc.role`, `xrpl.rpc.status`
+- [01-architecture-analysis.md §1.6](./01-architecture-analysis.md) — Key trace points table: `rpc.command.*` in `RPCHandler.cpp::doCommand()` (Priority: High)
+- [02-design-decisions.md §2.6.5](./02-design-decisions.md) — Correlation with PerfLog: how `doCommand()` can link trace_id with existing PerfLog entries
+- [03-implementation-strategy.md §3.4.4](./03-implementation-strategy.md) — RPC request overhead budget: ~1.75 μs total per request
+
+---
+
+## Task 8: Build, Run, and Verify End-to-End
+
+> **OTLP** = OpenTelemetry Protocol
+
+**Objective**: Prove the full pipeline works: rippled emits traces -> OTel Collector receives them -> Tempo stores them for Grafana visualization.
+
+**What to do**:
+
+1. **Start the Docker stack**:
+
+   ```bash
+   docker compose -f docker/telemetry/docker-compose.yml up -d
+   ```
+
+   Verify Collector health: `curl http://localhost:13133`
+
+2. **Build rippled with telemetry**:
+
+   ```bash
+   # Adjust for your actual build workflow
+   conan install . --build=missing -o with_telemetry=True
+   cmake --preset default -DXRPL_ENABLE_TELEMETRY=ON
+   cmake --build --preset default
+   ```
+
+3. **Configure rippled**:
+   Add to `rippled.cfg` (or your local test config):
+
+   ```ini
+   [telemetry]
+   enabled=1
+   endpoint=localhost:4317
+   sampling_ratio=1.0
+   trace_rpc=1
+   ```
+
+4. **Start rippled** in standalone mode:
+
+   ```bash
+   ./rippled --conf rippled.cfg -a --start
+   ```
+
+5. **Generate RPC traffic**:
+
+   ```bash
+   # server_info
+   curl -s -X POST http://localhost:5005 \
+     -H "Content-Type: application/json" \
+     -d '{"method":"server_info","params":[{}]}'
+
+   # ledger
+   curl -s -X POST http://localhost:5005 \
+     -H "Content-Type: application/json" \
+     -d '{"method":"ledger","params":[{"ledger_index":"current"}]}'
+
+   # account_info (will error in standalone, that's fine — we trace errors too)
+   curl -s -X POST http://localhost:5005 \
+     -H "Content-Type: application/json" \
+     -d '{"method":"account_info","params":[{"account":"rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh"}]}'
+   ```
+
+6. **Verify in Grafana (Tempo)**:
+   - Open `http://localhost:3000`
+   - Navigate to Explore → select Tempo datasource
+   - Search for service `rippled`
+   - Confirm you see traces with spans: `rpc.request` -> `rpc.process` -> `rpc.command.server_info`
+   - Click into a trace and verify attributes: `xrpl.rpc.command`, `xrpl.rpc.status`, `xrpl.rpc.version`
+
+7. **Verify zero-overhead when disabled**:
+   - Rebuild with `XRPL_ENABLE_TELEMETRY=OFF`, or set `enabled=0` in config
+   - Run the same RPC calls
+   - Confirm no new traces appear and no errors in rippled logs
+
+**Verification Checklist**:
+
+- [ ] Docker stack starts without errors
+- [ ] rippled builds with `-DXRPL_ENABLE_TELEMETRY=ON`
+- [ ] rippled starts and connects to OTel Collector (check rippled logs for telemetry messages)
+- [ ] Traces appear in Grafana/Tempo under service "rippled"
+- [ ] Span hierarchy is correct (parent-child relationships)
+- [ ] Span attributes are populated (`xrpl.rpc.command`, `xrpl.rpc.status`, etc.)
+- [ ] Error spans show error status and message
+- [ ] Building with `XRPL_ENABLE_TELEMETRY=OFF` produces no regressions
+- [ ] Setting `enabled=0` at runtime produces no traces and no errors
+
+**Reference**:
+
+- [06-implementation-phases.md §6.11.1](./06-implementation-phases.md) — Phase 1 definition of done: SDK compiles, runtime toggle works, span creation verified in Tempo, config validation passes
+- [06-implementation-phases.md §6.11.2](./06-implementation-phases.md#6112-phase-2-rpc-tracing) — Phase 2 definition of done: 100% RPC coverage, traceparent propagation, <1ms p99 overhead, dashboard deployed
+- [06-implementation-phases.md §6.8](./06-implementation-phases.md) — Success metrics: trace coverage >95%, CPU overhead <3%, memory <5 MB, latency impact <2%
+- [03-implementation-strategy.md §3.9.5](./03-implementation-strategy.md) — Backward compatibility: config optional, protocol unchanged, `XRPL_ENABLE_TELEMETRY=OFF` produces identical binary
+- [01-architecture-analysis.md §1.8](./01-architecture-analysis.md) — Observable outcomes: what traces, metrics, and dashboards to expect
+
+---
+
+## Task 9: Document POC Results and Next Steps
+
+> **OTLP** = OpenTelemetry Protocol | **WS** = WebSocket
+
+**Objective**: Capture findings, screenshots, and remaining work for the team.
+
+**What to do**:
+
+- Take screenshots of Grafana/Tempo showing:
+  - The service list with "rippled"
+  - A trace with the full span tree
+  - Span detail view showing attributes
+- Document any issues encountered (build issues, SDK quirks, missing attributes)
+- Note performance observations (build time impact, any noticeable runtime overhead)
+- Write a short summary of what the POC proves and what it doesn't cover yet:
+  - **Proves**: OTel SDK integrates with rippled, OTLP export works, RPC traces visible
+  - **Doesn't cover**: Cross-node P2P context propagation, consensus tracing, protobuf trace context, W3C traceparent header extraction, tail-based sampling, production deployment
+- Outline next steps (mapping to the full plan phases):
+  - [Phase 2](./06-implementation-phases.md) completion: [W3C header extraction](./02-design-decisions.md) (§2.5), WebSocket tracing, all [RPC handlers](./01-architecture-analysis.md) (§1.6)
+  - [Phase 3](./06-implementation-phases.md): [Protobuf `TraceContext` message](./04-code-samples.md) (§4.4), [transaction relay tracing](./04-code-samples.md) (§4.5.1) across nodes
+  - [Phase 4](./06-implementation-phases.md): [Consensus round and phase tracing](./04-code-samples.md) (§4.5.2)
+  - [Phase 5](./06-implementation-phases.md): [Production collector config](./05-configuration-reference.md) (§5.5.2), [Grafana dashboards](./07-observability-backends.md) (§7.6), [alerting](./07-observability-backends.md) (§7.6.3)
+
+**Reference**:
+
+- [06-implementation-phases.md §6.1](./06-implementation-phases.md) — Full 5-phase timeline overview and Gantt chart
+- [06-implementation-phases.md §6.10](./06-implementation-phases.md) — Crawl-Walk-Run strategy: POC is the CRAWL phase, next steps are WALK and RUN
+- [06-implementation-phases.md §6.12](./06-implementation-phases.md) — Recommended implementation order (14 steps across 9 weeks)
+- [03-implementation-strategy.md §3.9](./03-implementation-strategy.md) — Code intrusiveness assessment and risk matrix for each remaining component
+- [07-observability-backends.md §7.2](./07-observability-backends.md) — Production backend selection (Tempo, Elastic APM, Honeycomb, Datadog)
+- [02-design-decisions.md §2.5](./02-design-decisions.md) — Context propagation design: W3C HTTP headers, protobuf P2P, JobQueue internal
+- [00-tracing-fundamentals.md](./00-tracing-fundamentals.md) — Reference for team onboarding on distributed tracing concepts
+
+---
+
+## Summary
+
+| Task | Description                          | New Files | Modified Files | Depends On |
+| ---- | ------------------------------------ | --------- | -------------- | ---------- |
+| 0    | Docker observability stack           | 4         | 0              | —          |
+| 1    | OTel C++ SDK dependency              | 0         | 2              | —          |
+| 2    | Core Telemetry interface + NullImpl  | 3         | 0              | 1          |
+| 3    | OTel-backed Telemetry implementation | 2         | 1              | 1, 2       |
+| 4    | Application lifecycle integration    | 0         | 3              | 2, 3       |
+| 5    | Instrumentation macros               | 1         | 0              | 2          |
+| 6    | Instrument RPC ServerHandler         | 0         | 1              | 4, 5       |
+| 7    | Instrument RPC command execution     | 0         | 1              | 4, 5       |
+| 8    | End-to-end verification              | 0         | 0              | 0-7        |
+| 9    | Document results and next steps      | 1         | 0              | 8          |
+
+**Parallel work**: Tasks 0 and 1 can run in parallel. Tasks 2 and 5 have no dependency on each other. Tasks 6 and 7 can be done in parallel once Tasks 4 and 5 are complete.
+
+---
+
+## Next Steps (Post-POC)
+
+> **OTLP** = OpenTelemetry Protocol | **WS** = WebSocket
+
+### Metrics Pipeline for Grafana Dashboards
+
+The current POC exports **traces only**. Grafana's Explore view can query Tempo for individual traces, but time-series charts (latency histograms, request throughput, error rates) require a **metrics pipeline**. To enable this:
+
+1. **Add a `spanmetrics` connector** to the OTel Collector config that derives RED metrics (Rate, Errors, Duration) from trace spans automatically:
+
+   ```yaml
+   connectors:
+     spanmetrics:
+       histogram:
+         explicit:
+           buckets: [1ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 5s]
+       dimensions:
+         - name: xrpl.rpc.command
+         - name: xrpl.rpc.status
+
+   exporters:
+     prometheus:
+       endpoint: 0.0.0.0:8889
+
+   service:
+     pipelines:
+       traces:
+         receivers: [otlp]
+         processors: [batch]
+         exporters: [debug, otlp/tempo, spanmetrics]
+       metrics:
+         receivers: [spanmetrics]
+         exporters: [prometheus]
+   ```
+
+2. **Add Prometheus** to the Docker Compose stack to scrape the collector's metrics endpoint.
+
+3. **Add Prometheus as a Grafana datasource** and build dashboards for:
+   - RPC request latency (p50/p95/p99) by command
+   - RPC throughput (requests/sec) by command
+   - Error rate by command
+   - Span duration distribution
+
+### Additional Instrumentation
+
+- **W3C `traceparent` header extraction** in `ServerHandler` to support cross-service context propagation from external callers
+- **WebSocket RPC tracing** in `ServerHandler::onWSMessage()`
+- **Transaction relay tracing** across nodes using protobuf `TraceContext` messages
+- **Consensus round and phase tracing** for validator coordination visibility
+- **Ledger close tracing** to measure close-to-validated latency
+
+### Production Hardening
+
+- **Tail-based sampling** in the OTel Collector to reduce volume while retaining error/slow traces
+- **TLS configuration** for the OTLP exporter in production deployments
+- **Resource limits** on the batch processor queue to prevent unbounded memory growth
+- **Health monitoring** for the telemetry pipeline itself (collector lag, export failures)
+
+### POC Lessons Learned
+
+Issues encountered during POC implementation that inform future work:
+
+| Issue                                                                                              | Resolution                                                                    | Impact on Future Work                                            |
+| -------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ---------------------------------------------------------------- |
+| Conan lockfile rejected `opentelemetry-cpp/1.18.0`                                                 | Used `--lockfile=""` to bypass                                                | Lockfile must be regenerated when adding new dependencies        |
+| Conan package only builds OTLP HTTP exporter, not gRPC                                             | Switched from gRPC to HTTP exporter (`localhost:4318/v1/traces`)              | HTTP exporter is the default; gRPC requires custom Conan profile |
+| CMake target `opentelemetry-cpp::api` etc. don't exist in Conan package                            | Use umbrella target `opentelemetry-cpp::opentelemetry-cpp`                    | Conan targets differ from upstream CMake targets                 |
+| OTel Collector `logging` exporter deprecated                                                       | Renamed to `debug` exporter                                                   | Use `debug` in all collector configs going forward               |
+| Macro parameter `telemetry` collided with `::xrpl::telemetry::` namespace                          | Renamed macro params to `_tel_obj_`, `_span_name_`                            | Avoid common words as macro parameter names                      |
+| `opentelemetry::trace::Scope` creates new context on move                                          | Store scope as member, create once in constructor                             | SpanGuard move semantics need care with Scope lifecycle          |
+| `TracerProviderFactory::Create` returns `unique_ptr<sdk::TracerProvider>`, not `nostd::shared_ptr` | Use `std::shared_ptr` member, wrap in `nostd::shared_ptr` for global provider | OTel SDK factory return types don't match API provider types     |
--- a/OpenTelemetryPlan/Phase2_taskList.md
+++ b/OpenTelemetryPlan/Phase2_taskList.md
@@ -0,0 +1,187 @@
+# Phase 2: RPC Tracing Completion Task List
+
+> **Goal**: Complete full RPC tracing coverage with W3C Trace Context propagation, unit tests, and performance validation. Build on the POC foundation to achieve production-quality RPC observability.
+>
+> **Scope**: W3C header extraction, TraceContext propagation utilities, unit tests for core telemetry, integration tests for RPC tracing, and performance benchmarks.
+>
+> **Branch**: `pratik/otel-phase2-rpc-tracing` (from `pratik/OpenTelemetry_and_DistributedTracing_planning`)
+
+### Related Plan Documents
+
+| Document                                                     | Relevance                                                     |
+| ------------------------------------------------------------ | ------------------------------------------------------------- |
+| [04-code-samples.md](./04-code-samples.md)                   | TraceContextPropagator (§4.4.2), RPC instrumentation (§4.5.3) |
+| [02-design-decisions.md](./02-design-decisions.md)           | W3C Trace Context (§2.5), span attributes (§2.4.2)            |
+| [06-implementation-phases.md](./06-implementation-phases.md) | Phase 2 tasks (§6.3), definition of done (§6.11.2)            |
+
+---
+
+## Task 2.1: Implement W3C Trace Context HTTP Header Extraction
+
+**Objective**: Extract `traceparent` and `tracestate` headers from incoming HTTP RPC requests so external callers can propagate their trace context into rippled.
+
+**What to do**:
+
+- Create `include/xrpl/telemetry/TraceContextPropagator.h`:
+  - `extractFromHeaders(headerGetter)` - extract W3C traceparent/tracestate from HTTP headers
+  - `injectToHeaders(ctx, headerSetter)` - inject trace context into response headers
+  - Use OTel's `TextMapPropagator` with `W3CTraceContextPropagator` for standards compliance
+  - Only compiled when `XRPL_ENABLE_TELEMETRY` is defined
+
+- Create `src/libxrpl/telemetry/TraceContextPropagator.cpp`:
+  - Implement a simple `TextMapCarrier` adapter for HTTP headers
+  - Use `opentelemetry::context::propagation::GlobalTextMapPropagator` for extraction/injection
+  - Register the W3C propagator in `TelemetryImpl::start()`
+
+- Modify `src/xrpld/rpc/detail/ServerHandler.cpp`:
+  - In the HTTP request handler, extract parent context from headers before creating span
+  - Pass extracted context to `startSpan()` as parent
+  - Inject trace context into response headers
+
+**Key new files**:
+
+- `include/xrpl/telemetry/TraceContextPropagator.h`
+- `src/libxrpl/telemetry/TraceContextPropagator.cpp`
+
+**Key modified files**:
+
+- `src/xrpld/rpc/detail/ServerHandler.cpp`
+- `src/libxrpl/telemetry/Telemetry.cpp` (register W3C propagator)
+
+**Reference**:
+
+- [04-code-samples.md §4.4.2](./04-code-samples.md) — TraceContextPropagator with extractFromHeaders/injectToHeaders
+- [02-design-decisions.md §2.5](./02-design-decisions.md) — W3C Trace Context propagation design
+
+---
+
+## Task 2.2: Add XRPL_TRACE_PEER Macro
+
+**Objective**: Add the missing peer-tracing macro for future Phase 3 use and ensure macro completeness.
+
+**What to do**:
+
+- Edit `src/xrpld/telemetry/TracingInstrumentation.h`:
+  - Add `XRPL_TRACE_PEER(_tel_obj_, _span_name_)` macro that checks `shouldTracePeer()`
+  - Add `XRPL_TRACE_LEDGER(_tel_obj_, _span_name_)` macro (for future ledger tracing)
+  - Ensure disabled variants expand to `((void)0)`
+
+**Key modified file**:
+
+- `src/xrpld/telemetry/TracingInstrumentation.h`
+
+---
+
+## Task 2.3: Add shouldTraceLedger() to Telemetry Interface
+
+**Objective**: The `Setup` struct has a `traceLedger` field but there's no corresponding virtual method. Add it for interface completeness.
+
+**What to do**:
+
+- Edit `include/xrpl/telemetry/Telemetry.h`:
+  - Add `virtual bool shouldTraceLedger() const = 0;`
+
+- Update all implementations:
+  - `src/libxrpl/telemetry/Telemetry.cpp` (TelemetryImpl, NullTelemetryOtel)
+  - `src/libxrpl/telemetry/NullTelemetry.cpp` (NullTelemetry)
+
+**Key modified files**:
+
+- `include/xrpl/telemetry/Telemetry.h`
+- `src/libxrpl/telemetry/Telemetry.cpp`
+- `src/libxrpl/telemetry/NullTelemetry.cpp`
+
+---
+
+## Task 2.4: Unit Tests for Core Telemetry Infrastructure
+
+**Objective**: Add unit tests for the core telemetry abstractions to validate correctness and catch regressions.
+
+**What to do**:
+
+- Create `src/test/telemetry/Telemetry_test.cpp`:
+  - Test NullTelemetry: verify all methods return expected no-op values
+  - Test Setup defaults: verify all Setup fields have correct defaults
+  - Test setup_Telemetry config parser: verify parsing of [telemetry] section
+  - Test enabled/disabled factory paths
+  - Test shouldTrace\* methods respect config flags
+
+- Create `src/test/telemetry/SpanGuard_test.cpp`:
+  - Test SpanGuard RAII lifecycle (span ends on destruction)
+  - Test move constructor works correctly
+  - Test setAttribute, setOk, setStatus, addEvent, recordException
+  - Test context() returns valid context
+
+- Add test files to CMake build
+
+**Key new files**:
+
+- `src/test/telemetry/Telemetry_test.cpp`
+- `src/test/telemetry/SpanGuard_test.cpp`
+
+**Reference**:
+
+- [06-implementation-phases.md §6.11.1](./06-implementation-phases.md) — Phase 1 exit criteria (unit tests passing)
+
+---
+
+## Task 2.5: Enhance RPC Span Attributes
+
+**Objective**: Add additional attributes to RPC spans per the semantic conventions defined in the plan.
+
+**What to do**:
+
+- Edit `src/xrpld/rpc/detail/ServerHandler.cpp`:
+  - Add `http.method` attribute for HTTP requests
+  - Add `http.status_code` attribute for responses
+  - Add `net.peer.ip` attribute for client IP (if available)
+
+- Edit `src/xrpld/rpc/detail/RPCHandler.cpp`:
+  - Add `xrpl.rpc.duration_ms` attribute on completion
+  - Add error message attribute on failure: `xrpl.rpc.error_message`
+
+**Key modified files**:
+
+- `src/xrpld/rpc/detail/ServerHandler.cpp`
+- `src/xrpld/rpc/detail/RPCHandler.cpp`
+
+**Reference**:
+
+- [02-design-decisions.md §2.4.2](./02-design-decisions.md) — RPC attribute schema
+
+---
+
+## Task 2.6: Build Verification and Performance Baseline
+
+**Objective**: Verify the build succeeds with and without telemetry, and establish a performance baseline.
+
+**What to do**:
+
+1. Build with `telemetry=ON` and verify no compilation errors
+2. Build with `telemetry=OFF` and verify no regressions
+3. Run existing unit tests to verify no breakage
+4. Document any build issues in lessons.md
+
+**Verification Checklist**:
+
+- [ ] `conan install . --build=missing -o telemetry=True` succeeds
+- [ ] `cmake --preset default -Dtelemetry=ON` configures correctly
+- [ ] Build succeeds with telemetry ON
+- [ ] Build succeeds with telemetry OFF
+- [ ] Existing tests pass with telemetry ON
+- [ ] Existing tests pass with telemetry OFF
+
+---
+
+## Summary
+
+| Task | Description                                 | New Files | Modified Files | Depends On |
+| ---- | ------------------------------------------- | --------- | -------------- | ---------- |
+| 2.1  | W3C Trace Context header extraction         | 2         | 2              | POC        |
+| 2.2  | Add XRPL_TRACE_PEER/LEDGER macros           | 0         | 1              | POC        |
+| 2.3  | Add shouldTraceLedger() interface method    | 0         | 3              | POC        |
+| 2.4  | Unit tests for core telemetry               | 2         | 1              | POC        |
+| 2.5  | Enhanced RPC span attributes                | 0         | 2              | POC        |
+| 2.6  | Build verification and performance baseline | 0         | 0              | 2.1-2.5    |
+
+**Parallel work**: Tasks 2.1, 2.2, 2.3 can run in parallel. Task 2.4 depends on 2.3. Task 2.5 can run in parallel with 2.4. Task 2.6 depends on all others.
--- a/OpenTelemetryPlan/Phase3_taskList.md
+++ b/OpenTelemetryPlan/Phase3_taskList.md
@@ -0,0 +1,238 @@
+# Phase 3: Transaction Tracing Task List
+
+> **Goal**: Trace the full transaction lifecycle from RPC submission through peer relay, including cross-node context propagation via Protocol Buffer extensions. This is the WALK phase that demonstrates true distributed tracing.
+>
+> **Scope**: Protocol Buffer `TraceContext` message, context serialization, PeerImp transaction instrumentation, NetworkOPs processing instrumentation, HashRouter visibility, and multi-node relay context propagation.
+>
+> **Branch**: `pratik/otel-phase3-tx-tracing` (from `pratik/otel-phase2-rpc-tracing`)
+
+### Related Plan Documents
+
+| Document                                                     | Relevance                                                                                        |
+| ------------------------------------------------------------ | ------------------------------------------------------------------------------------------------ |
+| [04-code-samples.md](./04-code-samples.md)                   | TraceContext protobuf (§4.4.1), PeerImp instrumentation (§4.5.1), context serialization (§4.4.2) |
+| [01-architecture-analysis.md](./01-architecture-analysis.md) | Transaction flow (§1.3), key trace points (§1.6)                                                 |
+| [06-implementation-phases.md](./06-implementation-phases.md) | Phase 3 tasks (§6.4), definition of done (§6.11.3)                                               |
+| [02-design-decisions.md](./02-design-decisions.md)           | Context propagation design (§2.5), attribute schema (§2.4.3)                                     |
+
+---
+
+## Task 3.1: Define TraceContext Protocol Buffer Message
+
+**Objective**: Add trace context fields to the P2P protocol messages so trace IDs can propagate across nodes.
+
+**What to do**:
+
+- Edit `include/xrpl/proto/xrpl.proto` (or `src/ripple/proto/ripple.proto`, wherever the proto is):
+  - Add `TraceContext` message definition:
+    ```protobuf
+    message TraceContext {
+        bytes trace_id = 1;      // 16-byte trace identifier
+        bytes span_id = 2;       // 8-byte span identifier
+        uint32 trace_flags = 3;  // bit 0 = sampled
+        string trace_state = 4;  // W3C tracestate value
+    }
+    ```
+  - Add `optional TraceContext trace_context = 1001;` to:
+    - `TMTransaction`
+    - `TMProposeSet` (for Phase 4 use)
+    - `TMValidation` (for Phase 4 use)
+  - Use high field numbers (1001+) to avoid conflicts with existing fields
+
+- Regenerate protobuf C++ code
+
+**Key modified files**:
+
+- `include/xrpl/proto/xrpl.proto` (or equivalent)
+
+**Reference**:
+
+- [04-code-samples.md §4.4.1](./04-code-samples.md) — TraceContext message definition
+- [02-design-decisions.md §2.5.2](./02-design-decisions.md) — Protocol buffer context propagation design
+
+---
+
+## Task 3.2: Implement Protobuf Context Serialization
+
+**Objective**: Create utilities to serialize/deserialize OTel trace context to/from protobuf `TraceContext` messages.
+
+**What to do**:
+
+- Create `include/xrpl/telemetry/TraceContextPropagator.h` (extend from Phase 2 if exists, or add protobuf methods):
+  - Add protobuf-specific methods:
+    - `static Context extractFromProtobuf(protocol::TraceContext const& proto)` — reconstruct OTel context from protobuf fields
+    - `static void injectToProtobuf(Context const& ctx, protocol::TraceContext& proto)` — serialize current span context into protobuf fields
+  - Both methods guard behind `#ifdef XRPL_ENABLE_TELEMETRY`
+
+- Create/extend `src/libxrpl/telemetry/TraceContextPropagator.cpp`:
+  - Implement extraction: read trace_id (16 bytes), span_id (8 bytes), trace_flags from protobuf, construct `SpanContext`, wrap in `Context`
+  - Implement injection: get current span from context, serialize its TraceId, SpanId, and TraceFlags into protobuf fields
+
+**Key new/modified files**:
+
+- `include/xrpl/telemetry/TraceContextPropagator.h`
+- `src/libxrpl/telemetry/TraceContextPropagator.cpp`
+
+**Reference**:
+
+- [04-code-samples.md §4.4.2](./04-code-samples.md) — Full extract/inject implementation
+
+---
+
+## Task 3.3: Instrument PeerImp Transaction Handling
+
+**Objective**: Add trace spans to the peer-level transaction receive and relay path.
+
+**What to do**:
+
+- Edit `src/xrpld/overlay/detail/PeerImp.cpp`:
+  - In `onMessage(TMTransaction)` / `handleTransaction()`:
+    - Extract parent trace context from incoming `TMTransaction::trace_context` field (if present)
+    - Create `tx.receive` span as child of extracted context (or new root if none)
+    - Set attributes: `xrpl.tx.hash`, `xrpl.peer.id`, `xrpl.tx.status`
+    - On HashRouter suppression (duplicate): set `xrpl.tx.suppressed=true`, add `tx.duplicate` event
+    - Wrap validation call with child span `tx.validate`
+    - Wrap relay with `tx.relay` span
+  - When relaying to peers:
+    - Inject current trace context into outgoing `TMTransaction::trace_context`
+    - Set `xrpl.tx.relay_count` attribute
+
+- Include `TracingInstrumentation.h` and use `XRPL_TRACE_TX` macro
+
+**Key modified files**:
+
+- `src/xrpld/overlay/detail/PeerImp.cpp`
+
+**Reference**:
+
+- [04-code-samples.md §4.5.1](./04-code-samples.md) — Full PeerImp instrumentation example
+- [01-architecture-analysis.md §1.3](./01-architecture-analysis.md) — Transaction flow diagram
+- [01-architecture-analysis.md §1.6](./01-architecture-analysis.md) — tx.receive trace point
+
+---
+
+## Task 3.4: Instrument NetworkOPs Transaction Processing
+
+**Objective**: Trace the transaction processing pipeline in NetworkOPs, covering both sync and async paths.
+
+**What to do**:
+
+- Edit `src/xrpld/app/misc/NetworkOPs.cpp`:
+  - In `processTransaction()`:
+    - Create `tx.process` span
+    - Set attributes: `xrpl.tx.hash`, `xrpl.tx.type`, `xrpl.tx.local` (whether from RPC or peer)
+    - Record whether sync or async path is taken
+
+  - In `doTransactionAsync()`:
+    - Capture parent context before queuing
+    - Create `tx.queue` span with queue depth attribute
+    - Add event when transaction is dequeued for processing
+
+  - In `doTransactionSync()`:
+    - Create `tx.process_sync` span
+    - Record result (applied, queued, rejected)
+
+**Key modified files**:
+
+- `src/xrpld/app/misc/NetworkOPs.cpp`
+
+**Reference**:
+
+- [01-architecture-analysis.md §1.6](./01-architecture-analysis.md) — tx.validate and tx.process trace points
+- [02-design-decisions.md §2.4.3](./02-design-decisions.md) — Transaction attribute schema
+
+---
+
+## Task 3.5: Instrument HashRouter for Dedup Visibility
+
+**Objective**: Make transaction deduplication visible in traces by recording HashRouter decisions as span attributes/events.
+
+**What to do**:
+
+- Edit `src/xrpld/overlay/detail/PeerImp.cpp` (in handleTransaction):
+  - After calling `HashRouter::shouldProcess()` or `addSuppressionPeer()`:
+    - Record `xrpl.tx.suppressed` attribute (true/false)
+    - Record `xrpl.tx.flags` showing current HashRouter state (SAVED, TRUSTED, etc.)
+    - Add `tx.first_seen` or `tx.duplicate` event
+
+- This is NOT a modification to HashRouter itself — just recording its decisions as span attributes in the existing PeerImp instrumentation from Task 3.3.
+
+**Key modified files**:
+
+- `src/xrpld/overlay/detail/PeerImp.cpp` (same changes as 3.3, logically grouped)
+
+---
+
+## Task 3.6: Context Propagation in Transaction Relay
+
+**Objective**: Ensure trace context flows correctly when transactions are relayed between peers, creating linked spans across nodes.
+
+**What to do**:
+
+- Verify the relay path injects trace context:
+  - When `PeerImp` relays a transaction, the `TMTransaction` message should carry `trace_context`
+  - When a remote peer receives it, the context is extracted and used as parent
+
+- Test context propagation:
+  - Manually verify with 2+ node setup that trace IDs match across nodes
+  - Confirm parent-child span relationships are correct in Jaeger
+
+- Handle edge cases:
+  - Missing trace context (older peers): create new root span
+  - Corrupted trace context: log warning, create new root span
+  - Sampled-out traces: respect trace flags
+
+**Key modified files**:
+
+- `src/xrpld/overlay/detail/PeerImp.cpp`
+- `src/xrpld/overlay/detail/OverlayImpl.cpp` (if relay method needs context param)
+
+**Reference**:
+
+- [02-design-decisions.md §2.5](./02-design-decisions.md) — Context propagation design
+- [04-code-samples.md §4.5.1](./04-code-samples.md) — Relay context injection pattern
+
+---
+
+## Task 3.7: Build Verification and Testing
+
+**Objective**: Verify all Phase 3 changes compile and work correctly.
+
+**What to do**:
+
+1. Build with `telemetry=ON` — verify no compilation errors
+2. Build with `telemetry=OFF` — verify no regressions
+3. Run existing unit tests
+4. Verify protobuf regeneration produces correct C++ code
+5. Document any issues encountered
+
+**Verification Checklist**:
+
+- [ ] Protobuf changes generate valid C++
+- [ ] Build succeeds with telemetry ON
+- [ ] Build succeeds with telemetry OFF
+- [ ] Existing tests pass
+- [ ] No undefined symbols from new telemetry calls
+
+---
+
+## Summary
+
+| Task | Description                         | New Files | Modified Files | Depends On |
+| ---- | ----------------------------------- | --------- | -------------- | ---------- |
+| 3.1  | TraceContext protobuf message       | 0         | 1              | Phase 2    |
+| 3.2  | Protobuf context serialization      | 1-2       | 0              | 3.1        |
+| 3.3  | PeerImp transaction instrumentation | 0         | 1              | 3.2        |
+| 3.4  | NetworkOPs transaction processing   | 0         | 1              | Phase 2    |
+| 3.5  | HashRouter dedup visibility         | 0         | 1              | 3.3        |
+| 3.6  | Relay context propagation           | 0         | 1-2            | 3.3, 3.5   |
+| 3.7  | Build verification and testing      | 0         | 0              | 3.1-3.6    |
+
+**Parallel work**: Tasks 3.1 and 3.4 can start in parallel. Task 3.2 depends on 3.1. Tasks 3.3 and 3.5 depend on 3.2. Task 3.6 depends on 3.3 and 3.5.
+
+**Exit Criteria** (from [06-implementation-phases.md §6.11.3](./06-implementation-phases.md)):
+
+- [ ] Transaction traces span across nodes
+- [ ] Trace context in Protocol Buffer messages
+- [ ] HashRouter deduplication visible in traces
+- [ ] <5% overhead on transaction throughput
--- a/OpenTelemetryPlan/Phase4_taskList.md
+++ b/OpenTelemetryPlan/Phase4_taskList.md
@@ -0,0 +1,837 @@
+# Phase 4: Consensus Tracing Task List
+
+> **Goal**: Full observability into consensus rounds — track round lifecycle, phase transitions, proposal handling, and validation. This is the RUN phase that completes the distributed tracing story.
+>
+> **Scope**: RCLConsensus instrumentation for round starts, phase transitions (open/establish/accept), proposal send/receive, validation handling, and correlation with transaction traces from Phase 3.
+>
+> **Branch**: `pratik/otel-phase4-consensus-tracing` (from `pratik/otel-phase3-tx-tracing`)
+
+### Related Plan Documents
+
+| Document                                                     | Relevance                                                   |
+| ------------------------------------------------------------ | ----------------------------------------------------------- |
+| [04-code-samples.md](./04-code-samples.md)                   | Consensus instrumentation (§4.5.2), consensus span patterns |
+| [01-architecture-analysis.md](./01-architecture-analysis.md) | Consensus round flow (§1.4), key trace points (§1.6)        |
+| [06-implementation-phases.md](./06-implementation-phases.md) | Phase 4 tasks (§6.5), definition of done (§6.11.4)          |
+| [02-design-decisions.md](./02-design-decisions.md)           | Consensus attribute schema (§2.4.4)                         |
+
+---
+
+## Task 4.1: Instrument Consensus Round Start
+
+**Objective**: Create a root span for each consensus round that captures the round's key parameters.
+
+**What to do**:
+
+- Edit `src/xrpld/app/consensus/RCLConsensus.cpp`:
+  - In `RCLConsensus::startRound()` (or the Adaptor's startRound):
+    - Create `consensus.round` span using `XRPL_TRACE_CONSENSUS` macro
+    - Set attributes:
+      - `xrpl.consensus.ledger.prev` — previous ledger hash
+      - `xrpl.consensus.ledger.seq` — target ledger sequence
+      - `xrpl.consensus.proposers` — number of trusted proposers
+      - `xrpl.consensus.mode` — "proposing" or "observing"
+    - Store the span context for use by child spans in phase transitions
+
+- Add a member to hold current round trace context:
+  - `opentelemetry::context::Context currentRoundContext_` (guarded by `#ifdef`)
+  - Updated at round start, used by phase transition spans
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.cpp`
+- `src/xrpld/app/consensus/RCLConsensus.h` (add context member)
+
+**Reference**:
+
+- [04-code-samples.md §4.5.2](./04-code-samples.md) — startRound instrumentation example
+- [01-architecture-analysis.md §1.4](./01-architecture-analysis.md) — Consensus round flow
+
+---
+
+## Task 4.2: Instrument Phase Transitions
+
+**Objective**: Create child spans for each consensus phase (open, establish, accept) to show timing breakdown.
+
+**What to do**:
+
+- Edit `src/xrpld/app/consensus/RCLConsensus.cpp`:
+  - Identify where phase transitions occur (the `Consensus<Adaptor>` template drives this)
+  - For each phase entry:
+    - Create span as child of `currentRoundContext_`: `consensus.phase.open`, `consensus.phase.establish`, `consensus.phase.accept`
+    - Set `xrpl.consensus.phase` attribute
+    - Add `phase.enter` event at start, `phase.exit` event at end
+    - Record phase duration in milliseconds
+
+  - In the `onClose` adaptor method:
+    - Create `consensus.ledger_close` span
+    - Set attributes: close_time, mode, transaction count in initial position
+
+  - Note: The Consensus template class in `src/xrpld/consensus/Consensus.h` drives phase transitions — Phase 4a instruments directly in the template
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.cpp`
+- Possibly `include/xrpl/consensus/Consensus.h` (for template-level phase tracking)
+
+**Reference**:
+
+- [04-code-samples.md §4.5.2](./04-code-samples.md) — phaseTransition instrumentation
+
+---
+
+## Task 4.3: Instrument Proposal Handling
+
+**Objective**: Trace proposal send and receive to show validator coordination.
+
+**What to do**:
+
+- Edit `src/xrpld/app/consensus/RCLConsensus.cpp`:
+  - In `Adaptor::propose()`:
+    - Create `consensus.proposal.send` span
+    - Set attributes: `xrpl.consensus.round` (proposal sequence), proposal hash
+    - Inject trace context into outgoing `TMProposeSet::trace_context` (from Phase 3 protobuf)
+
+  - In `Adaptor::peerProposal()` (or wherever peer proposals are received):
+    - Extract trace context from incoming `TMProposeSet::trace_context`
+    - Create `consensus.proposal.receive` span as child of extracted context
+    - Set attributes: `xrpl.consensus.proposer` (node ID), `xrpl.consensus.round`
+
+  - In `Adaptor::share(RCLCxPeerPos)`:
+    - Create `consensus.proposal.relay` span for relaying peer proposals
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.cpp`
+
+**Reference**:
+
+- [04-code-samples.md §4.5.2](./04-code-samples.md) — peerProposal instrumentation
+- [02-design-decisions.md §2.4.4](./02-design-decisions.md) — Consensus attribute schema
+
+---
+
+## Task 4.4: Instrument Validation Handling
+
+**Objective**: Trace validation send and receive to show ledger validation flow.
+
+**What to do**:
+
+- Edit `src/xrpld/app/consensus/RCLConsensus.cpp` (or the validation handler):
+  - When sending our validation:
+    - Create `consensus.validation.send` span
+    - Set attributes: validated ledger hash, sequence, signing time
+
+  - When receiving a peer validation:
+    - Extract trace context from `TMValidation::trace_context` (if present)
+    - Create `consensus.validation.receive` span
+    - Set attributes: `xrpl.consensus.validator` (node ID), ledger hash
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.cpp`
+- `src/xrpld/app/misc/NetworkOPs.cpp` (if validation handling is here)
+
+---
+
+## Task 4.5: Add Consensus-Specific Attributes
+
+**Objective**: Enrich consensus spans with detailed attributes for debugging and analysis.
+
+**What to do**:
+
+- Review all consensus spans and ensure they include:
+  - `xrpl.consensus.ledger.seq` — target ledger sequence number
+  - `xrpl.consensus.round` — consensus round number
+  - `xrpl.consensus.mode` — proposing/observing/wrongLedger
+  - `xrpl.consensus.phase` — current phase name
+  - `xrpl.consensus.phase_duration_ms` — time spent in phase
+  - `xrpl.consensus.proposers` — number of trusted proposers
+  - `xrpl.consensus.tx_count` — transactions in proposed set
+  - `xrpl.consensus.disputes` — number of disputed transactions
+  - `xrpl.consensus.converge_percent` — convergence percentage
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.cpp`
+
+---
+
+## Task 4.6: Correlate Transaction and Consensus Traces
+
+**Objective**: Link transaction traces from Phase 3 with consensus traces so you can follow a transaction from submission through consensus into the ledger.
+
+**What to do**:
+
+- In `onClose()` or `onAccept()`:
+  - When building the consensus position, link the round span to individual transaction spans using span links (if OTel SDK supports it) or events
+  - At minimum, record the transaction hashes included in the consensus set as span events: `tx.included` with `xrpl.tx.hash` attribute
+
+- In `processTransactionSet()` (NetworkOPs):
+  - If the consensus round span context is available, create child spans for each transaction applied to the ledger
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.cpp`
+- `src/xrpld/app/misc/NetworkOPs.cpp`
+
+---
+
+## Task 4.7: Build Verification and Testing
+
+**Objective**: Verify all Phase 4 changes compile and don't affect consensus timing.
+
+**What to do**:
+
+1. Build with `telemetry=ON` — verify no compilation errors
+2. Build with `telemetry=OFF` — verify no regressions (critical for consensus code)
+3. Run existing consensus-related unit tests
+4. Verify that all macros expand to no-ops when disabled
+5. Check that no consensus-critical code paths are affected by instrumentation overhead
+
+**Verification Checklist**:
+
+- [ ] Build succeeds with telemetry ON
+- [ ] Build succeeds with telemetry OFF
+- [ ] Existing consensus tests pass
+- [ ] No new includes in consensus headers when telemetry is OFF
+- [ ] Phase timing instrumentation doesn't use blocking operations
+
+---
+
+## Summary
+
+| Task | Description                           | New Files | Modified Files | Depends On    |
+| ---- | ------------------------------------- | --------- | -------------- | ------------- |
+| 4.1  | Consensus round start instrumentation | 0         | 2              | Phase 3       |
+| 4.2  | Phase transition instrumentation      | 0         | 1-2            | 4.1           |
+| 4.3  | Proposal handling instrumentation     | 0         | 1              | 4.1           |
+| 4.4  | Validation handling instrumentation   | 0         | 1-2            | 4.1           |
+| 4.5  | Consensus-specific attributes         | 0         | 1              | 4.2, 4.3, 4.4 |
+| 4.6  | Transaction-consensus correlation     | 0         | 2              | 4.2, Phase 3  |
+| 4.7  | Build verification and testing        | 0         | 0              | 4.1-4.6       |
+
+**Parallel work**: Tasks 4.2, 4.3, and 4.4 can run in parallel after 4.1 is complete. Task 4.5 depends on all three. Task 4.6 depends on 4.2 and Phase 3.
+
+### Implemented Spans
+
+| Span Name                   | Method                             | Key Attributes                                                                                                                                                                                                        |
+| --------------------------- | ---------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `consensus.proposal.send`   | `Adaptor::propose`                 | `xrpl.consensus.round`                                                                                                                                                                                                |
+| `consensus.ledger_close`    | `Adaptor::onClose`                 | `xrpl.consensus.ledger.seq`, `xrpl.consensus.mode`                                                                                                                                                                    |
+| `consensus.accept`          | `Adaptor::onAccept`                | `xrpl.consensus.proposers`, `xrpl.consensus.round_time_ms`                                                                                                                                                            |
+| `consensus.accept.apply`    | `Adaptor::doAccept`                | `xrpl.consensus.close_time`, `close_time_correct`, `close_resolution_ms`, `state`, `proposing`, `round_time_ms`, `ledger.seq`, `parent_close_time`, `close_time_self`, `close_time_vote_bins`, `resolution_direction` |
+| `consensus.validation.send` | `Adaptor::onAccept` (via validate) | `xrpl.consensus.proposing`                                                                                                                                                                                            |
+
+#### Close Time Attributes (consensus.accept.apply)
+
+The `consensus.accept.apply` span captures ledger close time agreement details
+driven by `avCT_CONSENSUS_PCT` (75% validator agreement threshold):
+
+- **`xrpl.consensus.close_time`** — Agreed-upon ledger close time (epoch seconds). When validators disagree (`consensusCloseTime == epoch`), this is synthetically set to `prevCloseTime + 1s`.
+- **`xrpl.consensus.close_time_correct`** — `true` if validators reached agreement, `false` if they "agreed to disagree" (close time forced to prev+1s).
+- **`xrpl.consensus.close_resolution_ms`** — Rounding granularity for close time (starts at 30s, decreases as ledger interval stabilizes).
+- **`xrpl.consensus.state`** — `"finished"` (normal) or `"moved_on"` (consensus failed, adopted best available).
+- **`xrpl.consensus.proposing`** — Whether this node was proposing.
+- **`xrpl.consensus.round_time_ms`** — Total consensus round duration.
+- **`xrpl.consensus.parent_close_time`** — Previous ledger's close time (epoch seconds). Enables computing close-time deltas across consecutive rounds without correlating separate spans.
+- **`xrpl.consensus.close_time_self`** — This node's own proposed close time before consensus voting.
+- **`xrpl.consensus.close_time_vote_bins`** — Number of distinct close-time vote bins from peer proposals. Higher values indicate less agreement among validators.
+- **`xrpl.consensus.resolution_direction`** — Whether close-time resolution `"increased"` (coarser), `"decreased"` (finer), or stayed `"unchanged"` relative to the previous ledger.
+
+**Exit Criteria** (from [06-implementation-phases.md §6.11.4](./06-implementation-phases.md)):
+
+- [x] Complete consensus round traces
+- [x] Phase transitions visible
+- [x] Proposals and validations traced
+- [x] Close time agreement tracked (per `avCT_CONSENSUS_PCT`)
+- [x] No impact on consensus timing
+
+---
+
+# Phase 4a: Establish-Phase Gap Fill & Cross-Node Correlation
+
+> **Goal**: Fill tracing gaps in the consensus establish phase (disputes, convergence,
+> threshold escalation, mode changes) and establish cross-node correlation using a
+> deterministic shared trace ID derived from `previousLedger.id()`.
+>
+> **Approach**: Direct instrumentation in `Consensus.h` — the generic consensus
+> template has full access to internal state (`convergePercent_`, `result_->disputes`,
+> `mode_`, threshold logic). Telemetry access comes via a single new adaptor
+> method `getTelemetry()`. Long-lived spans (round, establish) are stored as
+> class members using `SpanGuard` directly — NOT the `XRPL_TRACE_*` convenience
+> macros (which create local variables named `_xrpl_guard_`). Short-lived
+> scoped spans (update_positions, check) can use the macros. All code compiles
+> to no-ops when `XRPL_ENABLE_TELEMETRY` is not defined.
+>
+> **Branch**: `pratik/otel-phase4-consensus-tracing`
+
+## Design: Switchable Correlation Strategy
+
+Two strategies for cross-node trace correlation, switchable via config:
+
+### Strategy A — Deterministic Trace ID (Default)
+
+Derive `trace_id = SHA256(previousLedger.id())[0:16]` so all nodes in the same
+consensus round share the same trace_id without P2P context propagation.
+
+- **Pros**: All nodes appear in the same trace in Tempo/Jaeger automatically.
+  No collector-side post-processing needed.
+- **Cons**: Overrides OTel's random trace_id generation; requires custom
+  `IdGenerator` or manual span context construction.
+
+### Strategy B — Attribute-Based Correlation
+
+Use normal random trace_id but attach `xrpl.consensus.ledger_id` as an attribute
+on every consensus span. Correlation happens at query time via Tempo/Grafana
+`by attribute` queries.
+
+- **Pros**: Standard OTel trace_id semantics; no SDK customization.
+- **Cons**: Cross-node correlation requires query-time joins, not automatic.
+
+### Config
+
+```ini
+[telemetry]
+# "deterministic" (default) or "attribute"
+consensus_trace_strategy=deterministic
+```
+
+### Implementation
+
+In `RCLConsensus::Adaptor::startRound()`:
+
+- If `deterministic`:
+  1. Compute `trace_id_bytes = SHA256(prevLedgerID)[0:16]`
+  2. Construct `opentelemetry::trace::TraceId(trace_id_bytes)`
+  3. Create a synthetic `SpanContext` with this trace_id and a random span_id:
+     ```cpp
+     auto traceId = opentelemetry::trace::TraceId(trace_id_bytes);
+     auto spanId  = opentelemetry::trace::SpanId(random_8_bytes);
+     auto syntheticCtx = opentelemetry::trace::SpanContext(
+         traceId, spanId, opentelemetry::trace::TraceFlags(1), false);
+     ```
+  4. Wrap in `opentelemetry::context::Context` via
+     `opentelemetry::trace::SetSpan(context, syntheticSpan)`
+  5. Call `startSpan("consensus.round", parentContext)` so the new span
+     inherits the deterministic trace_id.
+- If `attribute`: start a normal `consensus.round` span, set
+  `xrpl.consensus.ledger_id = previousLedger.id()` as attribute.
+
+Both strategies always set `xrpl.consensus.round_id` (round number) and
+`xrpl.consensus.ledger_id` (previous ledger hash) as attributes.
+
+---
+
+## Design: Span Hierarchy
+
+```
+consensus.round  (root — created in RCLConsensus::startRound, closed at accept)
+│   link → previous round's SpanContext (follows-from)
+│
+├── consensus.establish  (phaseEstablish → acceptance, in Consensus.h)
+│   ├── consensus.update_positions  (each updateOurPositions call)
+│   │   └── consensus.dispute.resolve  (per-tx dispute resolution event)
+│   ├── consensus.check  (each haveConsensus call)
+│   └── consensus.mode_change  (short-lived span in adaptor on mode transition)
+│
+├── consensus.accept  (existing onAccept span — reparented under round)
+│
+└── consensus.validation.send  (existing — reparented, follows-from link to round)
+```
+
+### Span Links (follows-from relationships)
+
+| Link Source                               | Link Target                | Rationale                                                                      |
+| ----------------------------------------- | -------------------------- | ------------------------------------------------------------------------------ |
+| `consensus.round` (N+1)                   | `consensus.round` (N)      | Causal chain: round N+1 exists because round N accepted                        |
+| `consensus.validation.send`               | `consensus.round`          | Validation follows from the round that produced it; may outlive the round span |
+| _(Phase 4b)_ Received proposal processing | Sender's `consensus.round` | Cross-node causal link via P2P context propagation                             |
+
+---
+
+## Task 4a.0: Prerequisites — Extend SpanGuard and Telemetry APIs
+
+**Objective**: Add missing API surface needed by later tasks.
+
+**What to do**:
+
+1. **Add `SpanGuard::addEvent()` with attributes** (needed by Task 4a.5):
+   The current `addEvent(string_view name)` only accepts a name. Add an
+   overload that accepts key-value attributes:
+
+   ```cpp
+   void addEvent(std::string_view name,
+       std::initializer_list<
+           std::pair<opentelemetry::nostd::string_view,
+                     opentelemetry::common::AttributeValue>> attributes)
+   {
+       span_->AddEvent(std::string(name), attributes);
+   }
+   ```
+
+2. **Add a `Telemetry::startSpan()` overload that accepts span links** (needed by Tasks 4a.2, 4a.8):
+   The current `startSpan()` has no span link support. Add an overload that
+   accepts a vector of `SpanContext` links for follows-from relationships:
+
+   ```cpp
+   virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
+   startSpan(
+       std::string_view name,
+       opentelemetry::context::Context const& parentContext,
+       std::vector<opentelemetry::trace::SpanContext> const& links,
+       opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0;
+   ```
+
+3. **Add `XRPL_TRACE_ADD_EVENT` macro** (needed by Task 4a.5):
+   Add to `TracingInstrumentation.h` to expose `addEvent(name, attrs)` through
+   the macro interface (consistent with `XRPL_TRACE_SET_ATTR` pattern):
+   ```cpp
+   #ifdef XRPL_ENABLE_TELEMETRY
+   #define XRPL_TRACE_ADD_EVENT(name, ...)               \
+       if (_xrpl_guard_.has_value())                     \
+       {                                                 \
+           _xrpl_guard_->addEvent(name, __VA_ARGS__);   \
+       }
+   #else
+   #define XRPL_TRACE_ADD_EVENT(name, ...) ((void)0)
+   #endif
+   ```
+
+**Key modified files**:
+
+- `include/xrpl/telemetry/SpanGuard.h` — add `addEvent()` overload
+- `include/xrpl/telemetry/Telemetry.h` — add `startSpan()` with links
+- `src/xrpld/telemetry/Telemetry.cpp` — implement new overload
+- `src/xrpld/telemetry/NullTelemetry.cpp` — no-op implementation
+- `src/xrpld/telemetry/TracingInstrumentation.h` — add `XRPL_TRACE_ADD_EVENT` macro
+
+---
+
+## Task 4a.1: Adaptor `getTelemetry()` Method
+
+**Objective**: Give `Consensus.h` access to the telemetry subsystem without
+coupling the generic template to OTel headers.
+
+**What to do**:
+
+- Add `getTelemetry()` method to the Adaptor concept (returns
+  `xrpl::telemetry::Telemetry&`). The return type is already forward-declared
+  behind `#ifdef XRPL_ENABLE_TELEMETRY`.
+- Implement in `RCLConsensus::Adaptor` — delegates to `app_.getTelemetry()`.
+- In `Consensus.h`, the `XRPL_TRACE_*` macros call
+  `adaptor_.getTelemetry()` — when telemetry is disabled, the macros expand to
+  `((void)0)` and the method is never called.
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.h` — declare `getTelemetry()`
+- `src/xrpld/app/consensus/RCLConsensus.cpp` — implement `getTelemetry()`
+
+---
+
+## Task 4a.2: Switchable Round Span with Deterministic Trace ID
+
+**Objective**: Create a `consensus.round` root span in `startRound()` that uses
+the switchable correlation strategy. Store span context as a member for child
+spans in `Consensus.h`.
+
+**What to do**:
+
+- In `RCLConsensus::Adaptor::startRound()` (or a new helper):
+  - Read `consensus_trace_strategy` from config.
+  - **Deterministic**: compute `trace_id = SHA256(prevLedgerID)[0:16]`.
+    Construct a `SpanContext` with this trace_id, then start
+    `consensus.round` span as child of that context.
+  - **Attribute**: start normal `consensus.round` span.
+  - Set attributes on both: `xrpl.consensus.round_id`,
+    `xrpl.consensus.ledger_id`, `xrpl.consensus.ledger.seq`,
+    `xrpl.consensus.mode`.
+  - Store the round span in `Consensus` as a member (see Task 4a.3).
+  - If a previous round's span context is available, add a **span link**
+    (follows-from) to establish the round chain.
+
+- Add `createDeterministicTraceId(hash)` utility to
+  `include/xrpl/telemetry/Telemetry.h` (returns 16-byte trace ID from a
+  256-bit hash by truncation).
+
+- Add `consensus_trace_strategy` to `Telemetry::Setup` and
+  `TelemetryConfig.cpp` parser:
+  ```cpp
+  /** Cross-node correlation strategy: "deterministic" or "attribute". */
+  std::string consensusTraceStrategy = "deterministic";
+  ```
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.cpp`
+- `include/xrpl/telemetry/Telemetry.h` — `createDeterministicTraceId()`
+- `src/xrpld/telemetry/TelemetryConfig.cpp` — parse new config option
+
+---
+
+## Task 4a.3: Span Members in `Consensus.h`
+
+**Objective**: Add span storage to the `Consensus` class so that spans created
+in `startRound()` (adaptor) are accessible from `phaseEstablish()`,
+`updateOurPositions()`, and `haveConsensus()` (template methods).
+
+**What to do**:
+
+- Add to `Consensus` private members (guarded by `#ifdef XRPL_ENABLE_TELEMETRY`):
+  ```cpp
+  #ifdef XRPL_ENABLE_TELEMETRY
+  std::optional<xrpl::telemetry::SpanGuard> roundSpan_;
+  std::optional<xrpl::telemetry::SpanGuard> establishSpan_;
+  opentelemetry::context::Context prevRoundContext_;
+  #endif
+  ```
+- `roundSpan_` is created in `startRound()` via the adaptor and stored.
+  Its `SpanGuard::Scope` member keeps the span active on the thread context
+  for the entire round lifetime.
+- `establishSpan_` is created when entering phaseEstablish and cleared on accept.
+  It becomes a child of `roundSpan_` via OTel's thread-local context propagation.
+- `prevRoundContext_` stores the previous round's context for follows-from links.
+
+**Threading assumption**: `startRound()`, `phaseEstablish()`, `updateOurPositions()`,
+and `haveConsensus()` all run on the same thread (the consensus job queue thread).
+This is required for the `SpanGuard::Scope`-based parent-child hierarchy to work.
+The `Consensus` class documentation confirms it is NOT thread-safe and calls are
+serialized by the application.
+
+- Add conditional include at top of `Consensus.h`:
+  ```cpp
+  #ifdef XRPL_ENABLE_TELEMETRY
+  #include <xrpl/telemetry/SpanGuard.h>
+  #include <xrpld/telemetry/TracingInstrumentation.h>
+  #endif
+  ```
+
+**Key modified files**:
+
+- `src/xrpld/consensus/Consensus.h`
+
+---
+
+## Task 4a.4: Instrument `phaseEstablish()`
+
+**Objective**: Create `consensus.establish` span wrapping the establish phase,
+with attributes for convergence progress.
+
+**What to do**:
+
+- At the start of `phaseEstablish()` (line 1298), if `establishSpan_` is not
+  yet created, create it as child of `roundSpan_` using the **direct API**
+  (NOT the `XRPL_TRACE_CONSENSUS` macro, which creates a local variable):
+
+  ```cpp
+  #ifdef XRPL_ENABLE_TELEMETRY
+  if (!establishSpan_ && adaptor_.getTelemetry().shouldTraceConsensus())
+  {
+      establishSpan_.emplace(
+          adaptor_.getTelemetry().startSpan("consensus.establish"));
+  }
+  #endif
+  ```
+
+- Set attributes on each call:
+  - `xrpl.consensus.converge_percent` — `convergePercent_`
+  - `xrpl.consensus.establish_count` — `establishCounter_`
+  - `xrpl.consensus.proposers` — `currPeerPositions_.size()`
+
+- On phase exit (transition to accept), close the establish span and record
+  final duration.
+
+**Key modified files**:
+
+- `src/xrpld/consensus/Consensus.h` — `phaseEstablish()` method
+
+---
+
+## Task 4a.5: Instrument `updateOurPositions()`
+
+**Objective**: Trace each position update cycle including dispute resolution
+details.
+
+**What to do**:
+
+- At the start of `updateOurPositions()` (line 1418), create a scoped child
+  span. This method is called and returns within a single `phaseEstablish()`
+  call, so the `XRPL_TRACE_CONSENSUS` macro works here (scoped local):
+
+  ```cpp
+  XRPL_TRACE_CONSENSUS(adaptor_.getTelemetry(), "consensus.update_positions");
+  ```
+
+- Set attributes:
+  - `xrpl.consensus.disputes_count` — `result_->disputes.size()`
+  - `xrpl.consensus.converge_percent` — current convergence
+  - `xrpl.consensus.proposers_agreed` — count of peers with same position
+  - `xrpl.consensus.proposers_total` — total peer positions
+
+- Inside the dispute resolution loop, for each dispute that changes our vote,
+  add an **event** with attributes using `XRPL_TRACE_ADD_EVENT` (from Task 4a.0):
+  ```cpp
+  XRPL_TRACE_ADD_EVENT("dispute.resolve", {
+      {"xrpl.tx.id", std::string(tx_id)},
+      {"xrpl.dispute.our_vote", our_vote},
+      {"xrpl.dispute.yays", static_cast<int64_t>(yays)},
+      {"xrpl.dispute.nays", static_cast<int64_t>(nays)}
+  });
+  ```
+
+**Key modified files**:
+
+- `src/xrpld/consensus/Consensus.h` — `updateOurPositions()` method
+
+---
+
+## Task 4a.6: Instrument `haveConsensus()` (Threshold & Convergence)
+
+**Objective**: Trace consensus checking including threshold escalation
+(`ConsensusParms::AvalancheState::{init, mid, late, stuck}`).
+
+**What to do**:
+
+- At the start of `haveConsensus()` (line 1598), create a scoped child span:
+
+  ```cpp
+  XRPL_TRACE_CONSENSUS(adaptor_.getTelemetry(), "consensus.check");
+  ```
+
+- Set attributes:
+  - `xrpl.consensus.agree_count` — peers that agree with our position
+  - `xrpl.consensus.disagree_count` — peers that disagree
+  - `xrpl.consensus.converge_percent` — convergence percentage
+  - `xrpl.consensus.result` — ConsensusState result (Yes/No/MovedOn)
+
+- The free function `checkConsensus()` in `Consensus.cpp` (line 151) determines
+  thresholds based on `currentAgreeTime`. Threshold values come from
+  `ConsensusParms::avalancheCutoffs` (defined in `ConsensusParms.h`).
+  The escalation states are `ConsensusParms::AvalancheState::{init, mid, late, stuck}`.
+  Record the effective threshold as an attribute on the span:
+  - `xrpl.consensus.threshold_percent` — current threshold from `avalancheCutoffs`
+
+**Key modified files**:
+
+- `src/xrpld/consensus/Consensus.h` — `haveConsensus()` method
+
+---
+
+## Task 4a.7: Instrument Mode Changes
+
+**Objective**: Trace consensus mode transitions (proposing ↔ observing,
+wrongLedger, switchedLedger).
+
+**What to do**:
+
+Mode changes are rare (typically 0-1 per round), so a **standalone short-lived
+span** is appropriate (not an event). This captures timing of the mode change
+itself.
+
+- In `RCLConsensus::Adaptor::onModeChange()`, create a scoped span:
+
+  ```cpp
+  XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.mode_change");
+  XRPL_TRACE_SET_ATTR("xrpl.consensus.mode.old", to_string(before).c_str());
+  XRPL_TRACE_SET_ATTR("xrpl.consensus.mode.new", to_string(after).c_str());
+  ```
+
+- Note: `MonitoredMode::set()` (line 304 in `Consensus.h`) calls
+  `adaptor_.onModeChange(before, after)` — so the span is created in the
+  adaptor, which already has telemetry access. No instrumentation needed
+  in `Consensus.h` for this task.
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.cpp` — `onModeChange()`
+
+---
+
+## Task 4a.8: Reparent Existing Spans Under Round
+
+**Objective**: Make existing consensus spans (`consensus.accept`,
+`consensus.accept.apply`, `consensus.validation.send`) children of the
+`consensus.round` root span instead of being standalone.
+
+**What to do**:
+
+- The existing spans in `onAccept()`, `doAccept()`, and `validate()` use
+  `XRPL_TRACE_CONSENSUS(app_.getTelemetry(), ...)` which creates standalone
+  spans on the current thread's context.
+- After Task 4a.2 creates the round span and stores it, these methods run on
+  the same thread within the round span's scope, so they automatically become
+  children. Verify this works correctly.
+- For `consensus.validation.send`: add a **span link** (follows-from) to the
+  round span context, since the validation may be processed after the round
+  completes.
+
+**Key modified files**:
+
+- `src/xrpld/app/consensus/RCLConsensus.cpp` — verify parent-child hierarchy
+
+---
+
+## Task 4a.9: Build Verification and Testing
+
+**Objective**: Verify all Phase 4a changes compile cleanly with telemetry ON
+and OFF, and don't affect consensus timing.
+
+**What to do**:
+
+1. Build with `telemetry=ON` — verify no compilation errors
+2. Build with `telemetry=OFF` — verify macros expand to no-ops, no new includes
+   leak into `Consensus.h` when disabled
+3. Run existing consensus unit tests
+4. Verify `#ifdef XRPL_ENABLE_TELEMETRY` guards on all new members in
+   `Consensus.h`
+5. Run `pccl` pre-commit checks
+
+**Verification Checklist**:
+
+- [x] Build succeeds with telemetry ON
+- [x] Build succeeds with telemetry OFF
+- [x] Existing consensus tests pass
+- [x] `Consensus.h` has zero OTel includes when telemetry is OFF
+- [x] No new virtual calls in hot consensus paths
+- [x] `pccl` passes
+
+---
+
+## Phase 4a Summary
+
+| Task | Description                                      | New Files | Modified Files | Depends On |
+| ---- | ------------------------------------------------ | --------- | -------------- | ---------- |
+| 4a.0 | Prerequisites: extend SpanGuard & Telemetry APIs | 0         | 4              | Phase 4    |
+| 4a.1 | Adaptor `getTelemetry()` method                  | 0         | 2              | Phase 4    |
+| 4a.2 | Switchable round span with deterministic traceID | 0         | 3              | 4a.0, 4a.1 |
+| 4a.3 | Span members in `Consensus.h`                    | 0         | 1              | 4a.1       |
+| 4a.4 | Instrument `phaseEstablish()`                    | 0         | 1              | 4a.3       |
+| 4a.5 | Instrument `updateOurPositions()`                | 0         | 1              | 4a.0, 4a.3 |
+| 4a.6 | Instrument `haveConsensus()` (thresholds)        | 0         | 1              | 4a.3       |
+| 4a.7 | Instrument mode changes                          | 0         | 1              | 4a.1       |
+| 4a.8 | Reparent existing spans under round              | 0         | 1              | 4a.0, 4a.2 |
+| 4a.9 | Build verification and testing                   | 0         | 0              | 4a.0-4a.8  |
+
+**Parallel work**: Tasks 4a.0 and 4a.1 can run in parallel. Tasks 4a.4, 4a.5, 4a.6, and 4a.7 can run in parallel after 4a.3 (and 4a.0 for 4a.5).
+
+### New Spans (Phase 4a)
+
+| Span Name                    | Location           | Key Attributes                                                                     |
+| ---------------------------- | ------------------ | ---------------------------------------------------------------------------------- |
+| `consensus.round`            | `RCLConsensus.cpp` | `round_id`, `ledger_id`, `ledger.seq`, `mode`; link → prev round                   |
+| `consensus.establish`        | `Consensus.h`      | `converge_percent`, `establish_count`, `proposers`                                 |
+| `consensus.update_positions` | `Consensus.h`      | `disputes_count`, `converge_percent`, `proposers_agreed`, `proposers_total`        |
+| `consensus.check`            | `Consensus.h`      | `agree_count`, `disagree_count`, `converge_percent`, `result`, `threshold_percent` |
+| `consensus.mode_change`      | `RCLConsensus.cpp` | `mode.old`, `mode.new`                                                             |
+
+### New Events (Phase 4a)
+
+| Event Name        | Parent Span                  | Attributes                          |
+| ----------------- | ---------------------------- | ----------------------------------- |
+| `dispute.resolve` | `consensus.update_positions` | `tx_id`, `our_vote`, `yays`, `nays` |
+
+### New Attributes (Phase 4a)
+
+```cpp
+// Round-level (on consensus.round)
+"xrpl.consensus.round_id"              = int64    // Consensus round number
+"xrpl.consensus.ledger_id"             = string   // previousLedger.id() hash
+"xrpl.consensus.trace_strategy"        = string   // "deterministic" or "attribute"
+
+// Establish-level
+"xrpl.consensus.converge_percent"      = int64    // Convergence % (0-100+)
+"xrpl.consensus.establish_count"       = int64    // Number of establish iterations
+"xrpl.consensus.disputes_count"        = int64    // Active disputes
+"xrpl.consensus.proposers_agreed"      = int64    // Peers agreeing with us
+"xrpl.consensus.proposers_total"       = int64    // Total peer positions
+"xrpl.consensus.agree_count"           = int64    // Peers that agree (haveConsensus)
+"xrpl.consensus.disagree_count"        = int64    // Peers that disagree
+"xrpl.consensus.threshold_percent"     = int64    // Current threshold (50/65/70/95)
+"xrpl.consensus.result"                = string   // "yes", "no", "moved_on"
+
+// Mode change
+"xrpl.consensus.mode.old"              = string   // Previous mode
+"xrpl.consensus.mode.new"              = string   // New mode
+```
+
+### Implementation Notes
+
+- **Separation of concerns**: All non-trivial telemetry code extracted to private
+  helpers (`startRoundTracing`, `createValidationSpan`, `startEstablishTracing`,
+  `updateEstablishTracing`, `endEstablishTracing`). Business logic methods contain
+  only single-line `#ifdef` blocks calling these helpers.
+- **Thread safety**: `createValidationSpan()` runs on the jtACCEPT worker thread.
+  Instead of accessing `roundSpan_` across threads, a `roundSpanContext_` snapshot
+  (lightweight `SpanContext` value type) is captured on the consensus thread in
+  `startRoundTracing()` and read by `createValidationSpan()`. The job queue
+  provides the happens-before guarantee.
+- **Macro safety**: `XRPL_TRACE_ADD_EVENT` uses `do { } while (0)` to prevent
+  dangling-else issues.
+- **Config validation**: `consensus_trace_strategy` is validated to be either
+  `"deterministic"` or `"attribute"`, falling back to `"deterministic"` for
+  unrecognised values.
+- **Plan deviation**: `roundSpan_` is stored in `RCLConsensus::Adaptor` (not
+  `Consensus.h`) because the adaptor has access to telemetry config and can
+  implement the deterministic trace ID strategy. `establishSpan_` is correctly
+  in `Consensus.h` as planned.
+
+---
+
+# Phase 4b: Cross-Node Propagation (Future — Documentation Only)
+
+> **Goal**: Wire `TraceContextPropagator` for P2P messages so that proposals
+> and validations carry trace context between nodes. This enables true
+> distributed tracing where a proposal sent by Node A creates a child span
+> on Node B.
+>
+> **Status**: NOT IMPLEMENTED. The protobuf fields and propagator class exist
+> but are not wired. This section documents the design for future work.
+
+## Architecture
+
+```
+Node A (proposing)                         Node B (receiving)
+─────────────────                         ──────────────────
+consensus.round                           consensus.round
+├── propose()                             ├── peerProposal()
+│   └── TraceContextPropagator            │   └── TraceContextPropagator
+│       ::injectToProtobuf(               │       ::extractFromProtobuf(
+│           TMProposeSet.trace_context)   │           TMProposeSet.trace_context)
+│                                         │   └── span link → Node A's context
+└── validate()                            └── onValidation()
+    └── inject into TMValidation              └── extract from TMValidation
+```
+
+## Wiring Points
+
+| Message         | Inject Location                    | Extract Location                    | Protobuf Field             |
+| --------------- | ---------------------------------- | ----------------------------------- | -------------------------- |
+| `TMProposeSet`  | `Adaptor::propose()`               | `PeerImp::onMessage(TMProposeSet)`  | field 1001: `TraceContext` |
+| `TMValidation`  | `Adaptor::validate()`              | `PeerImp::onMessage(TMValidation)`  | field 1001: `TraceContext` |
+| `TMTransaction` | `NetworkOPs::processTransaction()` | `PeerImp::onMessage(TMTransaction)` | field 1001: `TraceContext` |
+
+## Span Link Semantics
+
+Received messages use **span links** (follows-from), NOT parent-child:
+
+- The receiver's processing span links to the sender's context
+- This preserves each node's independent trace tree
+- Cross-node correlation visible via linked traces in Tempo/Jaeger
+
+## Interaction with Deterministic Trace ID (Strategy A)
+
+When using deterministic trace_id (Phase 4a default), cross-node spans already
+share the same trace_id. P2P propagation adds **span-level** linking:
+
+- Without propagation: spans from different nodes appear in the same trace
+  (same trace_id) but without parent-child or follows-from relationships.
+- With propagation: spans have explicit links showing which proposal/validation
+  from Node A caused processing on Node B.
+
+## Prerequisites
+
+- Phase 4a (this task list) — establish phase tracing must be in place
+- `TraceContextPropagator` class (already exists in
+  `include/xrpl/telemetry/TraceContextPropagator.h`)
+- Protobuf `TraceContext` message (already exists, field 1001)
--- a/OpenTelemetryPlan/Phase5_taskList.md
+++ b/OpenTelemetryPlan/Phase5_taskList.md
@@ -0,0 +1,241 @@
+# Phase 5: Documentation & Deployment Task List
+
+> **Goal**: Production readiness — Grafana dashboards, spanmetrics pipeline, operator runbook, alert definitions, and final integration testing. This phase ensures the telemetry system is useful and maintainable in production.
+>
+> **Scope**: Grafana dashboard definitions, OTel Collector spanmetrics connector, Prometheus integration, alert rules, operator documentation, and production-ready Docker Compose stack.
+>
+> **Branch**: `pratik/otel-phase5-docs-deployment` (from `pratik/otel-phase4-consensus-tracing`)
+
+### Related Plan Documents
+
+| Document                                                         | Relevance                                                                  |
+| ---------------------------------------------------------------- | -------------------------------------------------------------------------- |
+| [07-observability-backends.md](./07-observability-backends.md)   | Jaeger setup (§7.1), Grafana dashboards (§7.6), alerts (§7.6.3)            |
+| [05-configuration-reference.md](./05-configuration-reference.md) | Collector config (§5.5), production config (§5.5.2), Docker Compose (§5.6) |
+| [06-implementation-phases.md](./06-implementation-phases.md)     | Phase 5 tasks (§6.6), definition of done (§6.11.5)                         |
+
+---
+
+## Task 5.1: Add Spanmetrics Connector to OTel Collector
+
+**Objective**: Derive RED metrics (Rate, Errors, Duration) from trace spans automatically, enabling Grafana time-series dashboards.
+
+**What to do**:
+
+- Edit `docker/telemetry/otel-collector-config.yaml`:
+  - Add `spanmetrics` connector:
+    ```yaml
+    connectors:
+      spanmetrics:
+        histogram:
+          explicit:
+            buckets: [1ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 5s]
+        dimensions:
+          - name: xrpl.rpc.command
+          - name: xrpl.rpc.status
+          - name: xrpl.consensus.phase
+          - name: xrpl.tx.type
+    ```
+  - Add `prometheus` exporter:
+    ```yaml
+    exporters:
+      prometheus:
+        endpoint: 0.0.0.0:8889
+    ```
+  - Wire the pipeline:
+    ```yaml
+    service:
+      pipelines:
+        traces:
+          receivers: [otlp]
+          processors: [batch]
+          exporters: [debug, otlp/jaeger, spanmetrics]
+        metrics:
+          receivers: [spanmetrics]
+          exporters: [prometheus]
+    ```
+
+- Edit `docker/telemetry/docker-compose.yml`:
+  - Expose port `8889` on the collector for Prometheus scraping
+  - Add Prometheus service
+  - Add Prometheus as Grafana datasource
+
+**Key modified files**:
+
+- `docker/telemetry/otel-collector-config.yaml`
+- `docker/telemetry/docker-compose.yml`
+
+**Key new files**:
+
+- `docker/telemetry/prometheus.yml` (Prometheus scrape config)
+- `docker/telemetry/grafana/provisioning/datasources/prometheus.yaml`
+
+**Reference**:
+
+- [POC_taskList.md §Next Steps](./POC_taskList.md) — Metrics pipeline for Grafana dashboards
+
+---
+
+## Task 5.2: Create Grafana Dashboards
+
+**Objective**: Provide pre-built Grafana dashboards for RPC performance, transaction lifecycle, and consensus health.
+
+**What to do**:
+
+- Create `docker/telemetry/grafana/provisioning/dashboards/dashboards.yaml` (provisioning config)
+- Create dashboard JSON files:
+  1. **RPC Performance Dashboard** (`rpc-performance.json`):
+     - RPC request latency (p50/p95/p99) by command — histogram panel
+     - RPC throughput (requests/sec) by command — time series
+     - RPC error rate by command — bar gauge
+     - Top slowest RPC commands — table
+
+  2. **Transaction Overview Dashboard** (`transaction-overview.json`):
+     - Transaction processing rate — time series
+     - Transaction latency distribution — histogram
+     - Suppression rate (duplicates) — stat panel
+     - Transaction processing path (sync vs async) — pie chart
+
+  3. **Consensus Health Dashboard** (`consensus-health.json`):
+     - Consensus round duration — time series
+     - Phase duration breakdown (open/establish/accept) — stacked bar
+     - Proposals sent/received per round — stat panel
+     - Consensus mode distribution (proposing/observing) — pie chart
+
+- Store dashboards in `docker/telemetry/grafana/dashboards/`
+
+**Key new files**:
+
+- `docker/telemetry/grafana/provisioning/dashboards/dashboards.yaml`
+- `docker/telemetry/grafana/dashboards/rpc-performance.json`
+- `docker/telemetry/grafana/dashboards/transaction-overview.json`
+- `docker/telemetry/grafana/dashboards/consensus-health.json`
+
+**Reference**:
+
+- [07-observability-backends.md §7.6](./07-observability-backends.md) — Grafana dashboard specifications
+- [01-architecture-analysis.md §1.8.3](./01-architecture-analysis.md) — Dashboard panel examples
+
+---
+
+## Task 5.3: Define Alert Rules
+
+**Objective**: Create alert definitions for key telemetry anomalies.
+
+**What to do**:
+
+- Create `docker/telemetry/grafana/provisioning/alerting/alerts.yaml`:
+  - **RPC Latency Alert**: p99 latency > 1s for any command over 5 minutes
+  - **RPC Error Rate Alert**: Error rate > 5% for any command over 5 minutes
+  - **Consensus Duration Alert**: Round duration > 10s (warn), > 30s (critical)
+  - **Transaction Processing Alert**: Processing rate drops below threshold
+  - **Telemetry Pipeline Health**: No spans received for > 2 minutes
+
+**Key new files**:
+
+- `docker/telemetry/grafana/provisioning/alerting/alerts.yaml`
+
+**Reference**:
+
+- [07-observability-backends.md §7.6.3](./07-observability-backends.md) — Alert rule definitions
+
+---
+
+## Task 5.4: Production Collector Configuration
+
+**Objective**: Create a production-ready OTel Collector configuration with tail-based sampling and resource limits.
+
+**What to do**:
+
+- Create `docker/telemetry/otel-collector-config-production.yaml`:
+  - Tail-based sampling policy:
+    - Always sample errors and slow traces
+    - 10% base sampling rate for normal traces
+    - Always sample first trace for each unique RPC command
+  - Resource limits:
+    - Memory limiter processor (80% of available memory)
+    - Queued retry for export failures
+  - TLS configuration for production endpoints
+  - Health check endpoint
+
+**Key new files**:
+
+- `docker/telemetry/otel-collector-config-production.yaml`
+
+**Reference**:
+
+- [05-configuration-reference.md §5.5.2](./05-configuration-reference.md) — Production collector config
+
+---
+
+## Task 5.5: Operator Runbook
+
+**Objective**: Create operator documentation for managing the telemetry system in production.
+
+**What to do**:
+
+- Create `docs/telemetry-runbook.md`:
+  - **Setup**: How to enable telemetry in rippled
+  - **Configuration**: All config options with descriptions
+  - **Collector Deployment**: Docker Compose vs. Kubernetes vs. bare metal
+  - **Troubleshooting**: Common issues and resolutions
+    - No traces appearing
+    - High memory usage from telemetry
+    - Collector connection failures
+    - Sampling configuration tuning
+  - **Performance Tuning**: Batch size, queue size, sampling ratio guidelines
+  - **Upgrading**: How to upgrade OTel SDK and Collector versions
+
+**Key new files**:
+
+- `docs/telemetry-runbook.md`
+
+---
+
+## Task 5.6: Final Integration Testing
+
+**Objective**: Validate the complete telemetry stack end-to-end.
+
+**What to do**:
+
+1. Start full Docker stack (Collector, Jaeger, Grafana, Prometheus)
+2. Build rippled with `telemetry=ON`
+3. Run in standalone mode with telemetry enabled
+4. Generate RPC traffic and verify traces in Jaeger
+5. Verify dashboards populate in Grafana
+6. Verify alerts trigger correctly
+7. Test telemetry OFF path (no regressions)
+8. Run full test suite
+
+**Verification Checklist**:
+
+- [ ] Docker stack starts without errors
+- [ ] Traces appear in Jaeger with correct hierarchy
+- [ ] Grafana dashboards show metrics derived from spans
+- [ ] Prometheus scrapes spanmetrics successfully
+- [ ] Alerts can be triggered by simulated conditions
+- [ ] Build succeeds with telemetry ON and OFF
+- [ ] Full test suite passes
+
+---
+
+## Summary
+
+| Task | Description                        | New Files | Modified Files | Depends On |
+| ---- | ---------------------------------- | --------- | -------------- | ---------- |
+| 5.1  | Spanmetrics connector + Prometheus | 2         | 2              | Phase 4    |
+| 5.2  | Grafana dashboards                 | 4         | 0              | 5.1        |
+| 5.3  | Alert definitions                  | 1         | 0              | 5.1        |
+| 5.4  | Production collector config        | 1         | 0              | Phase 4    |
+| 5.5  | Operator runbook                   | 1         | 0              | Phase 4    |
+| 5.6  | Final integration testing          | 0         | 0              | 5.1-5.5    |
+
+**Parallel work**: Tasks 5.1, 5.4, and 5.5 can run in parallel. Tasks 5.2 and 5.3 depend on 5.1. Task 5.6 depends on all others.
+
+**Exit Criteria** (from [06-implementation-phases.md §6.11.5](./06-implementation-phases.md)):
+
+- [ ] Dashboards deployed and showing data
+- [ ] Alerts configured and tested
+- [ ] Operator documentation complete
+- [ ] Production collector config ready
+- [ ] Full test suite passes
--- a/OpenTelemetryPlan/presentation.md
+++ b/OpenTelemetryPlan/presentation.md
@@ -0,0 +1,673 @@
+# OpenTelemetry Distributed Tracing for rippled
+
+---
+
+## Slide 1: Introduction
+
+> **CNCF** = Cloud Native Computing Foundation
+
+### What is OpenTelemetry?
+
+OpenTelemetry is an open-source, CNCF-backed observability framework for distributed tracing, metrics, and logs.
+
+### Why OpenTelemetry for rippled?
+
+- **End-to-End Transaction Visibility**: Track transactions from submission → consensus → ledger inclusion
+- **Cross-Node Correlation**: Follow requests across multiple independent nodes using a unique `trace_id`
+- **Consensus Round Analysis**: Understand timing and behavior across validators
+- **Incident Debugging**: Correlate events across distributed nodes during issues
+
+```mermaid
+flowchart LR
+    A["Node A<br/>tx.receive<br/>trace_id: abc123"] --> B["Node B<br/>tx.relay<br/>trace_id: abc123"] --> C["Node C<br/>tx.validate<br/>trace_id: abc123"] --> D["Node D<br/>ledger.apply<br/>trace_id: abc123"]
+
+    style A fill:#1565c0,stroke:#0d47a1,color:#fff
+    style B fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style C fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style D fill:#e65100,stroke:#bf360c,color:#fff
+```
+
+**Reading the diagram:**
+
+- **Node A (blue, leftmost)**: The originating node that first receives the transaction and assigns a new `trace_id: abc123`; this ID becomes the correlation key for the entire distributed trace.
+- **Node B and Node C (green, middle)**: Relay and validation nodes — each creates its own span but carries the same `trace_id`, so their work is linked to the original submission without any central coordinator.
+- **Node D (orange, rightmost)**: The final node that applies the transaction to the ledger; the trace now spans the full lifecycle from submission to ledger inclusion.
+- **Left-to-right flow**: The horizontal progression shows the real-world message path — a transaction hops from node to node, and the shared `trace_id` stitches all hops into a single queryable trace.
+
+> **Trace ID: abc123** — All nodes share the same trace, enabling cross-node correlation.
+
+---
+
+## Slide 2: OpenTelemetry vs Open Source Alternatives
+
+> **CNCF** = Cloud Native Computing Foundation
+
+| Feature             | OpenTelemetry    | Jaeger           | Zipkin             | SkyWalking | Pinpoint   | Prometheus |
+| ------------------- | ---------------- | ---------------- | ------------------ | ---------- | ---------- | ---------- |
+| **Tracing**         | YES              | YES              | YES                | YES        | YES        | NO         |
+| **Metrics**         | YES              | NO               | NO                 | YES        | YES        | YES        |
+| **Logs**            | YES              | NO               | NO                 | YES        | NO         | NO         |
+| **C++ SDK**         | YES Official     | YES (Deprecated) | YES (Unmaintained) | NO         | NO         | YES        |
+| **Vendor Neutral**  | YES Primary goal | NO               | NO                 | NO         | NO         | NO         |
+| **Instrumentation** | Manual + Auto    | Manual           | Manual             | Auto-first | Auto-first | Manual     |
+| **Backend**         | Any (exporters)  | Self             | Self               | Self       | Self       | Self       |
+| **CNCF Status**     | Incubating       | Graduated        | NO                 | Incubating | NO         | Graduated  |
+
+> **Why OpenTelemetry?** It's the only actively maintained, full-featured C++ option with vendor neutrality — allowing export to Tempo, Prometheus, Grafana, or any commercial backend without changing instrumentation.
+
+---
+
+## Slide 3: Adoption Scope — Traces Only (Current Plan)
+
+OpenTelemetry supports three signal types: **Traces**, **Metrics**, and **Logs**. rippled already captures metrics (StatsD via Beast Insight) and logs (Journal/PerfLog). The question is: how much of OTel do we adopt?
+
+> **Scenario A**: Add distributed tracing. Keep StatsD for metrics and Journal for logs.
+
+```mermaid
+flowchart LR
+    subgraph rippled["rippled Process"]
+        direction TB
+        OTel["OTel SDK<br/>(Traces)"]
+        Insight["Beast Insight<br/>(StatsD Metrics)"]
+        Journal["Journal + PerfLog<br/>(Logging)"]
+    end
+
+    OTel -->|"OTLP"| Collector["OTel Collector"]
+    Insight -->|"UDP"| StatsD["StatsD Server"]
+    Journal -->|"File I/O"| LogFile["perf.log / debug.log"]
+
+    Collector --> Tempo["Tempo / Jaeger"]
+    StatsD --> Graphite["Graphite / Grafana"]
+    LogFile --> Loki["Loki (optional)"]
+
+    style rippled fill:#424242,stroke:#212121,color:#fff
+    style OTel fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style Insight fill:#1565c0,stroke:#0d47a1,color:#fff
+    style Journal fill:#e65100,stroke:#bf360c,color:#fff
+    style Collector fill:#2e7d32,stroke:#1b5e20,color:#fff
+```
+
+| Aspect                         | Details                                                                                                         |
+| ------------------------------ | --------------------------------------------------------------------------------------------------------------- |
+| **What changes for operators** | Deploy OTel Collector + trace backend. Existing StatsD and log pipelines stay as-is.                            |
+| **Codebase impact**            | New `Telemetry` module (~1500 LOC). Beast Insight and Journal untouched.                                        |
+| **New capabilities**           | Cross-node trace correlation, span-based debugging, request lifecycle visibility.                               |
+| **What we still can't do**     | Correlate metrics with specific traces natively. StatsD metrics remain fire-and-forget with no trace exemplars. |
+| **Maintenance burden**         | Three separate observability systems to maintain (OTel + StatsD + Journal).                                     |
+| **Risk**                       | Lowest — additive change, no existing systems disturbed.                                                        |
+
+---
+
+## Slide 4: Future Adoption — Metrics & Logs via OTel
+
+### Scenario B: + OTel Metrics (Replace StatsD)
+
+> Migrate StatsD to OTel Metrics API, exposing Prometheus-compatible metrics. Remove Beast Insight.
+
+```mermaid
+flowchart LR
+    subgraph rippled["rippled Process"]
+        direction TB
+        OTel["OTel SDK<br/>(Traces + Metrics)"]
+        Journal["Journal + PerfLog<br/>(Logging)"]
+    end
+
+    OTel -->|"OTLP"| Collector["OTel Collector"]
+    Journal -->|"File I/O"| LogFile["perf.log / debug.log"]
+
+    Collector --> Tempo["Tempo<br/>(Traces)"]
+    Collector --> Prom["Prometheus<br/>(Metrics)"]
+    LogFile --> Loki["Loki (optional)"]
+
+    style rippled fill:#424242,stroke:#212121,color:#fff
+    style OTel fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style Journal fill:#e65100,stroke:#bf360c,color:#fff
+    style Collector fill:#2e7d32,stroke:#1b5e20,color:#fff
+```
+
+- **Better metrics?** Yes — Prometheus gives native histograms (p50/p95/p99), multi-dimensional labels, and exemplars linking metric spikes to traces.
+- **Codebase**: Remove `Beast::Insight` + `StatsDCollector` (~2000 LOC). Single SDK for traces and metrics.
+- **Operator effort**: Rewrite dashboards from StatsD/Graphite queries to PromQL. Run both in parallel during transition.
+- **Risk**: Medium — operators must migrate monitoring infrastructure.
+
+### Scenario C: + OTel Logs (Full Stack)
+
+> Also replace Journal logging with OTel Logs API. Single SDK for everything.
+
+```mermaid
+flowchart LR
+    subgraph rippled["rippled Process"]
+        OTel["OTel SDK<br/>(Traces + Metrics + Logs)"]
+    end
+
+    OTel -->|"OTLP"| Collector["OTel Collector"]
+
+    Collector --> Tempo["Tempo<br/>(Traces)"]
+    Collector --> Prom["Prometheus<br/>(Metrics)"]
+    Collector --> Loki["Loki / Elastic<br/>(Logs)"]
+
+    style rippled fill:#424242,stroke:#212121,color:#fff
+    style OTel fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style Collector fill:#2e7d32,stroke:#1b5e20,color:#fff
+```
+
+- **Structured logging**: OTel Logs API outputs structured records with `trace_id`, `span_id`, severity, and attributes by design.
+- **Full correlation**: Every log line carries `trace_id`. Click trace → see logs. Click metric spike → see trace → see logs.
+- **Codebase**: Remove Beast Insight (~2000 LOC) + simplify Journal/PerfLog (~3000 LOC). One dependency instead of three.
+- **Risk**: Highest — `beast::Journal` is deeply embedded in every component. Large refactor. OTel C++ Logs API is newer (stable since v1.11, less battle-tested).
+
+### Recommendation
+
+```mermaid
+flowchart LR
+    A["Phase 1<br/><b>Traces Only</b><br/>(Current Plan)"] --> B["Phase 2<br/><b>+ Metrics</b><br/>(Replace StatsD)"] --> C["Phase 3<br/><b>+ Logs</b><br/>(Full OTel)"]
+
+    style A fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style B fill:#1565c0,stroke:#0d47a1,color:#fff
+    style C fill:#e65100,stroke:#bf360c,color:#fff
+```
+
+| Phase                | Signal    | Strategy                                                       | Risk   |
+| -------------------- | --------- | -------------------------------------------------------------- | ------ |
+| **Phase 1** (now)    | Traces    | Add OTel traces. Keep StatsD and Journal. Prove value.         | Low    |
+| **Phase 2** (future) | + Metrics | Migrate StatsD → Prometheus via OTel. Remove Beast Insight.    | Medium |
+| **Phase 3** (future) | + Logs    | Adopt OTel Logs API. Align with structured logging initiative. | High   |
+
+> **Key Takeaway**: Start with traces (unique value, lowest risk), then incrementally adopt metrics and logs as the OTel infrastructure proves itself.
+
+---
+
+## Slide 5: Comparison with rippled's Existing Solutions
+
+### Current Observability Stack
+
+| Aspect                | PerfLog (JSON)        | StatsD (Metrics)      | OpenTelemetry (NEW)         |
+| --------------------- | --------------------- | --------------------- | --------------------------- |
+| **Type**              | Logging               | Metrics               | Distributed Tracing         |
+| **Scope**             | Single node           | Single node           | **Cross-node**              |
+| **Data**              | JSON log entries      | Counters, gauges      | Spans with context          |
+| **Correlation**       | By timestamp          | By metric name        | By `trace_id`               |
+| **Overhead**          | Low (file I/O)        | Low (UDP)             | Low-Medium (configurable)   |
+| **Question Answered** | "What happened here?" | "How many? How fast?" | **"What was the journey?"** |
+
+### Use Case Matrix
+
+| Scenario                         | PerfLog | StatsD | OpenTelemetry |
+| -------------------------------- | ------- | ------ | ------------- |
+| "How many TXs per second?"       | ❌      | ✅     | ❌            |
+| "Why was this specific TX slow?" | ⚠️      | ❌     | ✅            |
+| "Which node delayed consensus?"  | ❌      | ❌     | ✅            |
+| "Show TX journey across 5 nodes" | ❌      | ❌     | ✅            |
+
+> **Key Insight**: In the **traces-only** approach (Phase 1), OpenTelemetry **complements** existing systems. In future phases, OTel metrics and logs could **replace** StatsD and Journal respectively — see Slides 3-4 for the full adoption roadmap.
+
+---
+
+## Slide 6: Architecture
+
+> **OTLP** = OpenTelemetry Protocol | **WS** = WebSocket
+
+### High-Level Integration Architecture
+
+```mermaid
+flowchart TB
+    subgraph rippled["rippled Node"]
+        subgraph services["Core Services"]
+            direction LR
+            RPC["RPC Server<br/>(HTTP/WS)"] ~~~ Overlay["Overlay<br/>(P2P Network)"] ~~~ Consensus["Consensus<br/>(RCLConsensus)"]
+        end
+
+        Telemetry["Telemetry Module<br/>(OpenTelemetry SDK)"]
+
+        services --> Telemetry
+    end
+
+    Telemetry -->|OTLP/gRPC| Collector["OTel Collector"]
+
+    Collector --> Tempo["Grafana Tempo"]
+    Collector --> Elastic["Elastic APM"]
+
+    style rippled fill:#424242,stroke:#212121,color:#fff
+    style services fill:#1565c0,stroke:#0d47a1,color:#fff
+    style Telemetry fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style Collector fill:#e65100,stroke:#bf360c,color:#fff
+```
+
+**Reading the diagram:**
+
+- **Core Services (blue, top)**: RPC Server, Overlay, and Consensus are the three primary components that generate trace data — they represent the entry points for client requests, peer messages, and consensus rounds respectively.
+- **Telemetry Module (green, middle)**: The OpenTelemetry SDK sits below the core services and receives span data from all three; it acts as a single collection point within the rippled process.
+- **OTel Collector (orange, center)**: An external process that receives spans over OTLP/gRPC from the Telemetry Module; it decouples rippled from backend choices and handles batching, sampling, and routing.
+- **Backends (bottom row)**: Tempo and Elastic APM are interchangeable — the Collector fans out to any combination, so operators can switch backends without modifying rippled code.
+- **Top-to-bottom flow**: Data flows from instrumented code down through the SDK, out over the network to the Collector, and finally into storage/visualization backends.
+
+### Context Propagation
+
+```mermaid
+sequenceDiagram
+    participant Client
+    participant NodeA as Node A
+    participant NodeB as Node B
+
+    Client->>NodeA: Submit TX (no context)
+    Note over NodeA: Creates trace_id: abc123<br/>span: tx.receive
+    NodeA->>NodeB: Relay TX<br/>(traceparent: abc123)
+    Note over NodeB: Links to trace_id: abc123<br/>span: tx.relay
+```
+
+- **HTTP/RPC**: W3C Trace Context headers (`traceparent`)
+- **P2P Messages**: Protocol Buffer extension fields
+
+---
+
+## Slide 7: Implementation Plan
+
+### 5-Phase Rollout (9 Weeks)
+
+> **Note**: Dates shown are relative to project start, not calendar dates.
+
+```mermaid
+gantt
+    title Implementation Timeline
+    dateFormat  YYYY-MM-DD
+    axisFormat  Week %W
+
+    section Phase 1
+    Core Infrastructure    :p1, 2024-01-01, 2w
+
+    section Phase 2
+    RPC Tracing           :p2, after p1, 2w
+
+    section Phase 3
+    Transaction Tracing   :p3, after p2, 2w
+
+    section Phase 4
+    Consensus Tracing     :p4, after p3, 2w
+
+    section Phase 5
+    Documentation         :p5, after p4, 1w
+```
+
+### Phase Details
+
+| Phase | Focus               | Key Deliverables                             | Effort  |
+| ----- | ------------------- | -------------------------------------------- | ------- |
+| 1     | Core Infrastructure | SDK integration, Telemetry interface, Config | 10 days |
+| 2     | RPC Tracing         | HTTP context extraction, Handler spans       | 10 days |
+| 3     | Transaction Tracing | Protobuf context, P2P relay propagation      | 10 days |
+| 4     | Consensus Tracing   | Round spans, Proposal/validation tracing     | 10 days |
+| 5     | Documentation       | Runbook, Dashboards, Training                | 7 days  |
+
+**Total Effort**: ~47 developer-days (2 developers)
+
+> **Future Phases** (not in current scope): After traces are stable, OTel metrics can replace StatsD (~3 weeks), and OTel logs can replace Journal (~4 weeks, aligned with structured logging initiative). See Slides 3-4 for the full adoption roadmap.
+
+---
+
+## Slide 8: Performance Overhead
+
+> **OTLP** = OpenTelemetry Protocol
+
+### Estimated System Impact
+
+| Metric            | Overhead   | Notes                                            |
+| ----------------- | ---------- | ------------------------------------------------ |
+| **CPU**           | 1-3%       | Span creation and attribute setting              |
+| **Memory**        | ~10 MB     | SDK statics + batch buffer + worker thread stack |
+| **Network**       | 10-50 KB/s | Compressed OTLP export to collector              |
+| **Latency (p99)** | <2%        | With proper sampling configuration               |
+
+#### How We Arrived at These Numbers
+
+**Assumptions (XRPL mainnet baseline)**:
+
+| Parameter                 | Value                  | Source                                                                                              |
+| ------------------------- | ---------------------- | --------------------------------------------------------------------------------------------------- |
+| Transaction throughput    | ~25 TPS (peaks to ~50) | Mainnet average                                                                                     |
+| Default peers per node    | 21                     | `peerfinder/detail/Tuning.h` (`defaultMaxPeers`)                                                    |
+| Consensus round frequency | ~1 round / 3-4 seconds | `ConsensusParms.h` (`ledgerMIN_CONSENSUS=1950ms`)                                                   |
+| Proposers per round       | ~20-35                 | Mainnet UNL size                                                                                    |
+| P2P message rate          | ~160 msgs/sec          | See message breakdown below                                                                         |
+| Avg TX processing time    | ~200 μs                | Profiled baseline                                                                                   |
+| Single span creation cost | 500-1000 ns            | OTel C++ SDK benchmarks (see [3.5.4](./03-implementation-strategy.md#354-performance-data-sources)) |
+
+**P2P message breakdown** (per node, mainnet):
+
+| Message Type  | Rate         | Derivation                                                            |
+| ------------- | ------------ | --------------------------------------------------------------------- |
+| TMTransaction | ~100/sec     | ~25 TPS × ~4 relay hops per TX, deduplicated by HashRouter            |
+| TMValidation  | ~50/sec      | ~35 validators × ~1 validation/3s round ≈ ~12/sec, plus relay fan-out |
+| TMProposeSet  | ~10/sec      | ~35 proposers / 3s round ≈ ~12/round, clustered in establish phase    |
+| **Total**     | **~160/sec** | **Only traced message types counted**                                 |
+
+**CPU (1-3%) — Calculation**:
+
+Per-transaction tracing cost breakdown:
+
+| Operation                                       | Cost        | Notes                                      |
+| ----------------------------------------------- | ----------- | ------------------------------------------ |
+| `tx.receive` span (create + end + 4 attributes) | ~1400 ns    | ~1000ns create + ~200ns end + 4×50ns attrs |
+| `tx.validate` span                              | ~1200 ns    | ~1000ns create + ~200ns for 2 attributes   |
+| `tx.relay` span                                 | ~1200 ns    | ~1000ns create + ~200ns for 2 attributes   |
+| Context injection into P2P message              | ~200 ns     | Serialize trace_id + span_id into protobuf |
+| **Total per TX**                                | **~4.0 μs** |                                            |
+
+> **CPU overhead**: 4.0 μs / 200 μs baseline = **~2.0% per transaction**. Under high load with consensus + RPC spans overlapping, reaches ~3%. Consensus itself adds only ~36 μs per 3-second round (~0.001%), so the TX path dominates. On production server hardware (3+ GHz Xeon), span creation drops to ~500-600 ns, bringing per-TX cost to ~2.6 μs (~1.3%). See [Section 3.5.4](./03-implementation-strategy.md#354-performance-data-sources) for benchmark sources.
+
+**Memory (~10 MB) — Calculation**:
+
+| Component                                     | Size               | Notes                                 |
+| --------------------------------------------- | ------------------ | ------------------------------------- |
+| TracerProvider + Exporter (gRPC channel init) | ~320 KB            | Allocated once at startup             |
+| BatchSpanProcessor (circular buffer)          | ~16 KB             | 2049 × 8-byte AtomicUniquePtr entries |
+| BatchSpanProcessor (worker thread stack)      | ~8 MB              | Default Linux thread stack size       |
+| Active spans (in-flight, max ~1000)           | ~500-800 KB        | ~500-800 bytes/span × 1000 concurrent |
+| Export queue (batch buffer, max 2048 spans)   | ~1 MB              | ~500 bytes/span × 2048 queue depth    |
+| Thread-local context storage (~100 threads)   | ~6.4 KB            | ~64 bytes/thread                      |
+| **Total**                                     | **~10 MB ceiling** |                                       |
+
+> Memory plateaus once the export queue fills — the `max_queue_size=2048` config bounds growth.
+> The worker thread stack (~8 MB) dominates the static footprint but is virtual memory; actual RSS
+> depends on stack usage (typically much less). Active spans are larger than originally estimated
+> (~500-800 bytes) because the OTel SDK `Span` object includes a mutex (~40 bytes), `SpanData`
+> recordable (~250 bytes base), and `std::map`-based attribute storage (~200-500 bytes for 3-5
+> string attributes). See [Section 3.5.4](./03-implementation-strategy.md#354-performance-data-sources) for source references.
+
+**Network (10-50 KB/s) — Calculation**:
+
+Two sources of network overhead:
+
+**(A) OTLP span export to Collector:**
+
+| Sampling Rate              | Effective Spans/sec | Avg Span Size (compressed) | Bandwidth    |
+| -------------------------- | ------------------- | -------------------------- | ------------ |
+| 100% (dev only)            | ~500                | ~500 bytes                 | ~250 KB/s    |
+| **10% (recommended prod)** | **~50**             | **~500 bytes**             | **~25 KB/s** |
+| 1% (minimal)               | ~5                  | ~500 bytes                 | ~2.5 KB/s    |
+
+> The ~500 spans/sec at 100% comes from: ~100 TX spans + ~160 P2P context spans + ~23 consensus spans/round + ~50 RPC spans = ~500/sec. OTLP protobuf with gzip compression yields ~500 bytes/span average.
+
+**(B) P2P trace context overhead** (added to existing messages, always-on regardless of sampling):
+
+| Message Type  | Rate     | Context Size | Bandwidth     |
+| ------------- | -------- | ------------ | ------------- |
+| TMTransaction | ~100/sec | 29 bytes     | ~2.9 KB/s     |
+| TMValidation  | ~50/sec  | 29 bytes     | ~1.5 KB/s     |
+| TMProposeSet  | ~10/sec  | 29 bytes     | ~0.3 KB/s     |
+| **Total P2P** |          |              | **~4.7 KB/s** |
+
+> **Combined**: 25 KB/s (OTLP export at 10%) + 5 KB/s (P2P context) ≈ **~30 KB/s typical**. The 10-50 KB/s range covers 10-20% sampling under normal to peak mainnet load.
+
+**Latency (<2%) — Calculation**:
+
+| Path                           | Tracing Cost | Baseline | Overhead |
+| ------------------------------ | ------------ | -------- | -------- |
+| Fast RPC (e.g., `server_info`) | 2.75 μs      | ~1 ms    | 0.275%   |
+| Slow RPC (e.g., `path_find`)   | 2.75 μs      | ~100 ms  | 0.003%   |
+| Transaction processing         | 4.0 μs       | ~200 μs  | 2.0%     |
+| Consensus round                | 36 μs        | ~3 sec   | 0.001%   |
+
+> At p99, even the worst case (TX processing at 2.0%) is within the 1-3% range. RPC and consensus overhead are negligible. On production hardware, TX overhead drops to ~1.3%.
+
+### Per-Message Overhead (Context Propagation)
+
+Each P2P message carries trace context with the following overhead:
+
+| Field         | Size          | Description                               |
+| ------------- | ------------- | ----------------------------------------- |
+| `trace_id`    | 16 bytes      | Unique identifier for the entire trace    |
+| `span_id`     | 8 bytes       | Current span (becomes parent on receiver) |
+| `trace_flags` | 1 byte        | Sampling decision flags                   |
+| `trace_state` | 0-4 bytes     | Optional vendor-specific data             |
+| **Total**     | **~29 bytes** | **Added per traced P2P message**          |
+
+```mermaid
+flowchart LR
+    subgraph msg["P2P Message with Trace Context"]
+        A["Original Message<br/>(variable size)"] --> B["+ TraceContext<br/>(~29 bytes)"]
+    end
+
+    subgraph breakdown["Context Breakdown"]
+        C["trace_id<br/>16 bytes"]
+        D["span_id<br/>8 bytes"]
+        E["flags<br/>1 byte"]
+        F["state<br/>0-4 bytes"]
+    end
+
+    B --> breakdown
+
+    style A fill:#424242,stroke:#212121,color:#fff
+    style B fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style C fill:#1565c0,stroke:#0d47a1,color:#fff
+    style D fill:#1565c0,stroke:#0d47a1,color:#fff
+    style E fill:#e65100,stroke:#bf360c,color:#fff
+    style F fill:#4a148c,stroke:#2e0d57,color:#fff
+```
+
+**Reading the diagram:**
+
+- **Original Message (gray, left)**: The existing P2P message payload of variable size — this is unchanged; trace context is appended, never modifying the original data.
+- **+ TraceContext (green, right of message)**: The additional 29-byte context block attached to each traced message; the arrow from the original message shows it is a pure addition.
+- **Context Breakdown (right subgraph)**: The four fields — `trace_id` (16 bytes), `span_id` (8 bytes), `flags` (1 byte), and `state` (0-4 bytes) — show exactly what is added and their individual sizes.
+- **Color coding**: Blue fields (`trace_id`, `span_id`) are the core identifiers required for trace correlation; orange (`flags`) controls sampling decisions; purple (`state`) is optional vendor data typically omitted.
+
+> **Note**: 29 bytes represents ~1-6% overhead depending on message size (500B simple TX to 5KB proposal), which is acceptable for the observability benefits provided.
+
+### Mitigation Strategies
+
+```mermaid
+flowchart LR
+    A["Head Sampling<br/>10% default"] --> B["Tail Sampling<br/>Keep errors/slow"] --> C["Batch Export<br/>Reduce I/O"] --> D["Conditional Compile<br/>XRPL_ENABLE_TELEMETRY"]
+
+    style A fill:#1565c0,stroke:#0d47a1,color:#fff
+    style B fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style C fill:#e65100,stroke:#bf360c,color:#fff
+    style D fill:#4a148c,stroke:#2e0d57,color:#fff
+```
+
+> For a detailed explanation of head vs. tail sampling, see Slide 9.
+
+### Kill Switches (Rollback Options)
+
+1. **Config Disable**: Set `enabled=0` in config → instant disable, no restart needed for sampling
+2. **Rebuild**: Compile with `XRPL_ENABLE_TELEMETRY=OFF` → zero overhead (no-op)
+3. **Full Revert**: Clean separation allows easy commit reversion
+
+---
+
+## Slide 9: Sampling Strategies — Head vs. Tail
+
+> Sampling controls **which traces are recorded and exported**. Without sampling, every operation generates a trace — at 500+ spans/sec, this overwhelms storage and network. Sampling lets you keep the signal, discard the noise.
+
+### Head Sampling (Decision at Start)
+
+The sampling decision is made **when a trace begins**, before any work is done. A random number is generated; if it falls within the configured ratio, the entire trace is recorded. Otherwise, the trace is silently dropped.
+
+```mermaid
+flowchart LR
+    A["New Request<br/>Arrives"] --> B{"Random < 10%?"}
+    B -->|"Yes (1 in 10)"| C["Record Entire Trace<br/>(all spans)"]
+    B -->|"No (9 in 10)"| D["Drop Entire Trace<br/>(zero overhead)"]
+
+    style C fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style D fill:#c62828,stroke:#8c2809,color:#fff
+    style B fill:#1565c0,stroke:#0d47a1,color:#fff
+```
+
+| Aspect                        | Details                                                                                                                                                                                                  |
+| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Where it runs**             | Inside rippled (SDK-level). Configured via `sampling_ratio` in `rippled.cfg`.                                                                                                                            |
+| **When the decision happens** | At trace creation time — before the first span is even populated.                                                                                                                                        |
+| **How it works**              | `sampling_ratio=0.1` means each trace has a 10% probability of being recorded. Dropped traces incur near-zero overhead (no spans created, no attributes set, no export).                                 |
+| **Propagation**               | Once a trace is sampled, the `trace_flags` field (1 byte in the context header) tells downstream nodes to also sample it. Unsampled traces propagate `trace_flags=0`, so downstream nodes skip them too. |
+| **Pros**                      | Lowest overhead. Simple to configure. Predictable resource usage.                                                                                                                                        |
+| **Cons**                      | **Blind** — it doesn't know if the trace will be interesting. A rare error or slow consensus round has only a 10% chance of being captured.                                                              |
+| **Best for**                  | High-volume, steady-state traffic where most traces look similar (e.g., routine RPC requests).                                                                                                           |
+
+**rippled configuration**:
+
+```ini
+[telemetry]
+# Record 10% of traces (recommended for production)
+sampling_ratio=0.1
+```
+
+### Tail Sampling (Decision at End)
+
+The sampling decision is made **after the trace completes**, based on its actual content — was it slow? Did it error? Was it a consensus round? This requires buffering complete traces before deciding.
+
+```mermaid
+flowchart TB
+    A["All Traces<br/>Buffered (100%)"] --> B["OTel Collector<br/>Evaluates Rules"]
+
+    B --> C{"Error?"}
+    C -->|Yes| K["KEEP"]
+
+    C -->|No| D{"Slow?<br/>(>5s consensus,<br/>>1s RPC)"}
+    D -->|Yes| K
+
+    D -->|No| E{"Random < 10%?"}
+    E -->|Yes| K
+    E -->|No| F["DROP"]
+
+    style K fill:#2e7d32,stroke:#1b5e20,color:#fff
+    style F fill:#c62828,stroke:#8c2809,color:#fff
+    style B fill:#1565c0,stroke:#0d47a1,color:#fff
+    style C fill:#e65100,stroke:#bf360c,color:#fff
+    style D fill:#e65100,stroke:#bf360c,color:#fff
+    style E fill:#4a148c,stroke:#2e0d57,color:#fff
+```
+
+| Aspect                        | Details                                                                                                                                                                                                   |
+| ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Where it runs**             | In the **OTel Collector** (external process), not inside rippled. rippled exports 100% of traces; the Collector decides what to keep.                                                                     |
+| **When the decision happens** | After the Collector has received all spans for a trace (waits `decision_wait=10s` for stragglers).                                                                                                        |
+| **How it works**              | Policy rules evaluate the completed trace: keep all errors, keep slow operations above a threshold, keep all consensus rounds, then probabilistically sample the rest at 10%.                             |
+| **Pros**                      | **Never misses important traces**. Errors, slow requests, and consensus anomalies are always captured regardless of probability.                                                                          |
+| **Cons**                      | Higher resource usage — rippled must export 100% of spans to the Collector, which buffers them in memory before deciding. The Collector needs more RAM (configured via `num_traces` and `decision_wait`). |
+| **Best for**                  | Production troubleshooting where you can't afford to miss errors or anomalies.                                                                                                                            |
+
+**Collector configuration** (tail sampling rules for rippled):
+
+```yaml
+processors:
+  tail_sampling:
+    decision_wait: 10s # Wait for all spans in a trace
+    num_traces: 100000 # Buffer up to 100K concurrent traces
+    policies:
+      - name: errors # Always keep error traces
+        type: status_code
+        status_code: { status_codes: [ERROR] }
+
+      - name: slow-consensus # Keep consensus rounds >5s
+        type: latency
+        latency: { threshold_ms: 5000 }
+
+      - name: slow-rpc # Keep slow RPC requests >1s
+        type: latency
+        latency: { threshold_ms: 1000 }
+
+      - name: probabilistic # Sample 10% of everything else
+        type: probabilistic
+        probabilistic: { sampling_percentage: 10 }
+```
+
+### Head vs. Tail — Side-by-Side
+
+|                               | Head Sampling                            | Tail Sampling                                    |
+| ----------------------------- | ---------------------------------------- | ------------------------------------------------ |
+| **Decision point**            | Trace start (inside rippled)             | Trace end (in OTel Collector)                    |
+| **Knows trace content?**      | No (random coin flip)                    | Yes (evaluates completed trace)                  |
+| **Overhead on rippled**       | Lowest (dropped traces = no-op)          | Higher (must export 100% to Collector)           |
+| **Collector resource usage**  | Low (receives only sampled traces)       | Higher (buffers all traces before deciding)      |
+| **Captures all errors?**      | No (only if trace was randomly selected) | **Yes** (error policy catches them)              |
+| **Captures slow operations?** | No (random)                              | **Yes** (latency policy catches them)            |
+| **Configuration**             | `rippled.cfg`: `sampling_ratio=0.1`      | `otel-collector.yaml`: `tail_sampling` processor |
+| **Best for**                  | High-throughput steady-state             | Troubleshooting & anomaly detection              |
+
+### Recommended Strategy for rippled
+
+Use **both** in a layered approach:
+
+```mermaid
+flowchart LR
+    subgraph rippled["rippled (Head Sampling)"]
+        HS["sampling_ratio=1.0<br/>(export everything)"]
+    end
+
+    subgraph collector["OTel Collector (Tail Sampling)"]
+        TS["Keep: errors + slow + 10% random<br/>Drop: routine traces"]
+    end
+
+    subgraph storage["Backend Storage"]
+        ST["Only interesting traces<br/>stored long-term"]
+    end
+
+    rippled -->|"100% of spans"| collector -->|"~15-20% kept"| storage
+
+    style rippled fill:#424242,stroke:#212121,color:#fff
+    style collector fill:#1565c0,stroke:#0d47a1,color:#fff
+    style storage fill:#2e7d32,stroke:#1b5e20,color:#fff
+```
+
+> **Why this works**: rippled exports everything (no blind drops), the Collector applies intelligent filtering (keep errors/slow/anomalies, sample the rest), and only ~15-20% of traces reach storage. If Collector resource usage becomes a concern, add head sampling at `sampling_ratio=0.5` to halve the export volume while still giving the Collector enough data for good tail-sampling decisions.
+
+---
+
+## Slide 10: Data Collection & Privacy
+
+### What Data is Collected
+
+| Category        | Attributes Collected                                                                 | Purpose                     |
+| --------------- | ------------------------------------------------------------------------------------ | --------------------------- |
+| **Transaction** | `tx.hash`, `tx.type`, `tx.result`, `tx.fee`, `ledger_index`                          | Trace transaction lifecycle |
+| **Consensus**   | `round`, `phase`, `mode`, `proposers` (count of proposing validators), `duration_ms` | Analyze consensus timing    |
+| **RPC**         | `command`, `version`, `status`, `duration_ms`                                        | Monitor RPC performance     |
+| **Peer**        | `peer.id`(public key), `latency_ms`, `message.type`, `message.size`                  | Network topology analysis   |
+| **Ledger**      | `ledger.hash`, `ledger.index`, `close_time`, `tx_count`                              | Ledger progression tracking |
+| **Job**         | `job.type`, `queue_ms`, `worker`                                                     | JobQueue performance        |
+
+### What is NOT Collected (Privacy Guarantees)
+
+```mermaid
+flowchart LR
+    subgraph notCollected["❌ NOT Collected"]
+        direction LR
+        A["Private Keys"] ~~~ B["Account Balances"] ~~~ C["Transaction Amounts"]
+    end
+
+    subgraph alsoNot["❌ Also Excluded"]
+        direction LR
+        D["IP Addresses<br/>(configurable)"] ~~~ E["Personal Data"] ~~~ F["Raw TX Payloads"]
+    end
+
+    style A fill:#c62828,stroke:#8c2809,color:#fff
+    style B fill:#c62828,stroke:#8c2809,color:#fff
+    style C fill:#c62828,stroke:#8c2809,color:#fff
+    style D fill:#c62828,stroke:#8c2809,color:#fff
+    style E fill:#c62828,stroke:#8c2809,color:#fff
+    style F fill:#c62828,stroke:#8c2809,color:#fff
+```
+
+**Reading the diagram:**
+
+- **NOT Collected (top row, red)**: Private Keys, Account Balances, and Transaction Amounts are explicitly excluded — these are financial/security-sensitive fields that telemetry never touches.
+- **Also Excluded (bottom row, red)**: IP Addresses (configurable per deployment), Personal Data, and Raw TX Payloads are also excluded — these protect operator and user privacy.
+- **All-red styling**: Every box is styled in red to visually reinforce that these are hard exclusions, not optional — the telemetry system has no code path to collect any of these fields.
+- **Two-row layout**: The split between "NOT Collected" and "Also Excluded" distinguishes between financial data (top) and operational/personal data (bottom), making the privacy boundaries clear to auditors.
+
+### Privacy Protection Mechanisms
+
+| Mechanism                  | Description                                                   |
+| -------------------------- | ------------------------------------------------------------- |
+| **Account Hashing**        | `xrpl.tx.account` is hashed at collector level before storage |
+| **Configurable Redaction** | Sensitive fields can be excluded via config                   |
+| **Sampling**               | Only 10% of traces recorded by default (reduces exposure)     |
+| **Local Control**          | Node operators control what gets exported                     |
+| **No Raw Payloads**        | Transaction content is never recorded, only metadata          |
+
+> **Key Principle**: Telemetry collects **operational metadata** (timing, counts, hashes) — never **sensitive content** (keys, balances, amounts).
+
+---
+
+_End of Presentation_
--- a/cfg/xrpld-example.cfg
+++ b/cfg/xrpld-example.cfg
@@ -1529,3 +1529,46 @@ validators.txt
 # set to ssl_verify to 0.
 [ssl_verify]
 1
+#-------------------------------------------------------------------------------
+#
+# 11. Telemetry (OpenTelemetry Tracing)
+#
+#-------------------------------------------------------------------------------
+#
+# Enables distributed tracing via OpenTelemetry. Requires building with
+# -DXRPL_ENABLE_TELEMETRY=ON (telemetry Conan option).
+#
+# [telemetry]
+#
+# enabled=0
+#
+#   Enable or disable telemetry at runtime. Default: 0 (disabled).
+#
+# endpoint=http://localhost:4318/v1/traces
+#
+#   The OpenTelemetry Collector endpoint (OTLP/HTTP). Default: http://localhost:4318/v1/traces.
+#
+# exporter=otlp_http
+#
+#   Exporter type: otlp_http. Default: otlp_http.
+#
+# sampling_ratio=1.0
+#
+#   Fraction of traces to sample (0.0 to 1.0). Default: 1.0 (all traces).
+#
+# trace_rpc=1
+#
+#   Enable RPC request tracing. Default: 1.
+#
+# trace_transactions=1
+#
+#   Enable transaction lifecycle tracing. Default: 1.
+#
+# trace_consensus=1
+#
+#   Enable consensus round tracing. Default: 1.
+#
+# trace_peer=0
+#
+#   Enable peer message tracing (high volume). Default: 0.
+#
--- a/cmake/XrplCore.cmake
+++ b/cmake/XrplCore.cmake
@@ -204,6 +204,23 @@ target_link_libraries(
 add_module(xrpl tx)
 target_link_libraries(xrpl.libxrpl.tx PUBLIC xrpl.libxrpl.ledger)

+# Telemetry module — OpenTelemetry distributed tracing support.
+# Sources: include/xrpl/telemetry/ (headers), src/libxrpl/telemetry/ (impl).
+# When telemetry=ON, links the Conan-provided umbrella target
+# opentelemetry-cpp::opentelemetry-cpp (individual component targets like
+# ::api, ::sdk are not available in the Conan package).
+add_module(xrpl telemetry)
+target_link_libraries(
+    xrpl.libxrpl.telemetry
+    PUBLIC xrpl.libxrpl.basics xrpl.libxrpl.beast
+)
+if(telemetry)
+    target_link_libraries(
+        xrpl.libxrpl.telemetry
+        PUBLIC opentelemetry-cpp::opentelemetry-cpp
+    )
+endif()
+
 add_library(xrpl.libxrpl)
 set_target_properties(xrpl.libxrpl PROPERTIES OUTPUT_NAME xrpl)

@@ -235,6 +252,7 @@ target_link_modules(
    resource
    server
    shamap
+    telemetry
    tx
 )

--- a/conan.lock
+++ b/conan.lock
@@ -10,10 +10,13 @@
        "rocksdb/10.5.1#4a197eca381a3e5ae8adf8cffa5aacd0%1765850186.86",
        "re2/20230301#ca3b241baec15bd31ea9187150e0b333%1765850148.103",
        "protobuf/6.32.1#f481fd276fc23a33b85a3ed1e898b693%1765850161.038",
-        "openssl/3.5.5#05a4ac5b7323f7a329b2db1391d9941f%1769599205.414",
+        "opentelemetry-cpp/1.18.0#efd9851e173f8a13b9c7d35232de8cf1%1750409186.472",
+        "openssl/3.5.5#05a4ac5b7323f7a329b2db1391d9941f%1770229825.601",
        "nudb/2.0.9#0432758a24204da08fee953ec9ea03cb%1769436073.32",
+        "nlohmann_json/3.11.3#45828be26eb619a2e04ca517bb7b828d%1701220705.259",
        "lz4/1.10.0#59fc63cac7f10fbe8e05c7e62c2f3504%1765850143.914",
        "libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1765842973.492",
+        "libcurl/8.18.0#364bc3755cb9ef84ed9a7ae9c7efc1c1%1770984390.024",
        "libbacktrace/cci.20210118#a7691bfccd8caaf66309df196790a5a1%1765842973.03",
        "libarchive/3.8.1#ffee18995c706e02bf96e7a2f7042e0d%1765850144.736",
        "jemalloc/5.3.0#e951da9cf599e956cebc117880d2d9f8%1729241615.244",
@@ -30,9 +33,15 @@
        "zlib/1.3.1#b8bc2603263cf7eccbd6e17e66b0ed76%1765850150.075",
        "strawberryperl/5.32.1.1#707032463aa0620fa17ec0d887f5fe41%1765850165.196",
        "protobuf/6.32.1#f481fd276fc23a33b85a3ed1e898b693%1765850161.038",
+        "pkgconf/2.5.1#93c2051284cba1279494a43a4fcfeae2%1757684701.089",
+        "opentelemetry-proto/1.4.0#4096a3b05916675ef9628f3ffd571f51%1732731336.11",
+        "ninja/1.13.2#c8c5dc2a52ed6e4e42a66d75b4717ceb%1764096931.974",
        "nasm/2.16.01#31e26f2ee3c4346ecd347911bd126904%1765850144.707",
        "msys2/cci.latest#eea83308ad7e9023f7318c60d5a9e6cb%1770199879.083",
+        "meson/1.10.0#60786758ea978964c24525de19603cf4%1768294926.103",
        "m4/1.4.19#70dc8bbb33e981d119d2acc0175cf381%1763158052.846",
+        "libtool/2.4.7#14e7739cc128bc1623d2ed318008e47e%1755679003.847",
+        "gnu-config/cci.20210814#466e9d4d7779e1c142443f7ea44b4284%1762363589.329",
        "cmake/4.2.0#ae0a44f44a1ef9ab68fd4b3e9a1f8671%1765850153.937",
        "cmake/3.31.10#313d16a1aa16bbdb2ca0792467214b76%1765850153.479",
        "b2/5.3.3#107c15377719889654eb9a162a673975%1765850144.355",
--- a/conanfile.py
+++ b/conanfile.py
@@ -23,6 +23,7 @@ class Xrpl(ConanFile):
        "rocksdb": [True, False],
        "shared": [True, False],
        "static": [True, False],
+        "telemetry": [True, False],
        "tests": [True, False],
        "unity": [True, False],
        "xrpld": [True, False],
@@ -55,6 +56,7 @@ class Xrpl(ConanFile):
        "rocksdb": True,
        "shared": False,
        "static": True,
+        "telemetry": True,
        "tests": False,
        "unity": False,
        "xrpld": False,
@@ -150,6 +152,10 @@ class Xrpl(ConanFile):
            self.requires("jemalloc/5.3.0")
        if self.options.rocksdb:
            self.requires("rocksdb/10.5.1")
+        # OpenTelemetry C++ SDK for distributed tracing (optional).
+        # Provides OTLP/HTTP exporter, batch span processor, and trace API.
+        if self.options.telemetry:
+            self.requires("opentelemetry-cpp/1.18.0")
        self.requires("xxhash/0.8.3", **transitive_headers_opt)

    exports_sources = (
@@ -178,6 +184,7 @@ class Xrpl(ConanFile):
        tc.variables["rocksdb"] = self.options.rocksdb
        tc.variables["BUILD_SHARED_LIBS"] = self.options.shared
        tc.variables["static"] = self.options.static
+        tc.variables["telemetry"] = self.options.telemetry
        tc.variables["unity"] = self.options.unity
        tc.variables["xrpld"] = self.options.xrpld
        tc.generate()
@@ -230,3 +237,5 @@ class Xrpl(ConanFile):
        ]
        if self.options.rocksdb:
            libxrpl.requires.append("rocksdb::librocksdb")
+        if self.options.telemetry:
+            libxrpl.requires.append("opentelemetry-cpp::opentelemetry-cpp")
--- a/cspell.config.yaml
+++ b/cspell.config.yaml
@@ -87,6 +87,8 @@ words:
  - daria
  - dcmake
  - dearmor
+  - Dedup
+  - dedup
  - deleteme
  - demultiplexer
  - deserializaton
@@ -182,6 +184,7 @@ words:
  - NOLINTNEXTLINE
  - nonxrp
  - noripple
+  - nostd
  - nudb
  - nullptr
  - nunl
@@ -196,6 +199,7 @@ words:
  - permissioned
  - pointee
  - populator
+  - pratik
  - preauth
  - preauthorization
  - preauthorize
@@ -210,6 +214,7 @@ words:
  - qalloc
  - queuable
  - Raphson
+  - reparent
  - replayer
  - rerere
  - retriable
@@ -267,6 +272,7 @@ words:
  - txjson
  - txn
  - txns
+  - txqueue
  - txs
  - UBSAN
  - ubsan
@@ -313,3 +319,9 @@ words:
  - xrplf
  - xxhash
  - xxhasher
+  - xychart
+  - otelc
+  - zpages
+  - traceql
+  - Gantt
+  - gantt
--- a/docker/telemetry/docker-compose.yml
+++ b/docker/telemetry/docker-compose.yml
@@ -0,0 +1,80 @@
+# Docker Compose stack for rippled OpenTelemetry observability.
+#
+# Provides services for local development:
+#   - otel-collector: receives OTLP traces from rippled, batches and
+#     forwards them to Jaeger and Tempo. Listens on ports 4317 (gRPC)
+#     and 4318 (HTTP).
+#   - jaeger: all-in-one tracing backend with UI on port 16686.
+#   - tempo: Grafana Tempo tracing backend, queryable via Grafana Explore
+#     on port 3000. Recommended for production (S3/GCS storage, TraceQL).
+#   - grafana: dashboards on port 3000, pre-configured with Jaeger, Tempo
+#     datasources.
+#
+# Usage:
+#   docker compose -f docker/telemetry/docker-compose.yml up -d
+#
+# Configure rippled to export traces by adding to xrpld.cfg:
+#   [telemetry]
+#   enabled=1
+#   endpoint=http://localhost:4318/v1/traces
+
+version: "3.8"
+
+services:
+  otel-collector:
+    image: otel/opentelemetry-collector-contrib:latest
+    command: ["--config=/etc/otel-collector-config.yaml"]
+    ports:
+      - "4317:4317" # OTLP gRPC
+      - "4318:4318" # OTLP HTTP
+      - "13133:13133" # Health check
+    volumes:
+      - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
+    depends_on:
+      - jaeger
+      - tempo
+    networks:
+      - rippled-telemetry
+
+  jaeger:
+    image: jaegertracing/all-in-one:latest
+    environment:
+      - COLLECTOR_OTLP_ENABLED=true
+    ports:
+      - "16686:16686" # Jaeger UI
+      - "14250:14250" # gRPC
+    networks:
+      - rippled-telemetry
+
+  tempo:
+    image: grafana/tempo:2.7.2
+    command: ["-config.file=/etc/tempo.yaml"]
+    ports:
+      - "3200:3200" # Tempo HTTP API (health, query)
+    volumes:
+      - ./tempo.yaml:/etc/tempo.yaml:ro
+      - tempo-data:/var/tempo
+    networks:
+      - rippled-telemetry
+
+  grafana:
+    image: grafana/grafana:latest
+    environment:
+      - GF_AUTH_ANONYMOUS_ENABLED=true
+      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
+    ports:
+      - "3000:3000"
+    volumes:
+      - ./grafana/provisioning:/etc/grafana/provisioning:ro
+    depends_on:
+      - jaeger
+      - tempo
+    networks:
+      - rippled-telemetry
+
+volumes:
+  tempo-data:
+
+networks:
+  rippled-telemetry:
+    driver: bridge
--- a/docker/telemetry/grafana/provisioning/datasources/jaeger.yaml
+++ b/docker/telemetry/grafana/provisioning/datasources/jaeger.yaml
@@ -0,0 +1,12 @@
+# Grafana datasource provisioning for the rippled telemetry stack.
+# Auto-configures Jaeger as a trace data source on Grafana startup.
+# Access Grafana at http://localhost:3000, then use Explore -> Jaeger
+# to browse rippled traces.
+
+apiVersion: 1
+
+datasources:
+  - name: Jaeger
+    type: jaeger
+    access: proxy
+    url: http://jaeger:16686
--- a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
+++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml
@@ -0,0 +1,147 @@
+# Grafana datasource provisioning for Grafana Tempo.
+# Auto-configures Tempo as a trace data source on Grafana startup.
+# Access Grafana at http://localhost:3000, then use Explore -> Tempo
+# to browse rippled traces using TraceQL.
+#
+# Search filters provide pre-configured dropdowns in the Explore UI.
+# Each phase adds filters for the span attributes it introduces.
+# Phase 1b (infra): Base filters — node identity, service, span name, status.
+# Phase 2 (RPC):    RPC command, status, role filters.
+# Phase 3 (TX):     Transaction hash, local/peer origin, status.
+# Phase 4 (Cons):   Consensus mode, round, ledger sequence, close time.
+
+apiVersion: 1
+
+datasources:
+  - name: Tempo
+    type: tempo
+    access: proxy
+    url: http://tempo:3200
+    uid: tempo
+    jsonData:
+      nodeGraph:
+        enabled: true
+      serviceMap:
+        datasourceUid: prometheus
+      tracesToMetrics:
+        datasourceUid: prometheus
+        spanStartTimeShift: "-1h"
+        spanEndTimeShift: "1h"
+      search:
+        filters:
+          # --- Node identification filters ---
+          # service.name: logical service name (default: "rippled").
+          #   Useful when running multiple service types in the same collector.
+          - id: service-name
+            tag: service.name
+            operator: "="
+            scope: resource
+            type: static
+          # service.instance.id: unique node identifier — defaults to the
+          #   node's public key (e.g., nHB1X37...). Distinguishes individual
+          #   nodes in a multi-node cluster or network.
+          - id: node-id
+            tag: service.instance.id
+            operator: "="
+            scope: resource
+            type: static
+          # service.version: rippled build version (e.g., "2.4.0-b1").
+          #   Filter traces from specific software releases.
+          - id: node-version
+            tag: service.version
+            operator: "="
+            scope: resource
+            type: dynamic
+          # xrpl.network.id: numeric network identifier
+          #   (0 = mainnet, 1 = testnet, 2 = devnet, etc.).
+          - id: network-id
+            tag: xrpl.network.id
+            operator: "="
+            scope: resource
+            type: dynamic
+          # xrpl.network.type: human-readable network name
+          #   ("mainnet", "testnet", "devnet", "standalone").
+          - id: network-type
+            tag: xrpl.network.type
+            operator: "="
+            scope: resource
+            type: static
+          # --- Span intrinsic filters ---
+          - id: span-name
+            tag: name
+            operator: "="
+            scope: intrinsic
+            type: static
+          - id: span-status
+            tag: status
+            operator: "="
+            scope: intrinsic
+            type: static
+          - id: span-duration
+            tag: duration
+            operator: ">"
+            scope: intrinsic
+            type: static
+          # Phase 2: RPC tracing filters
+          - id: rpc-command
+            tag: xrpl.rpc.command
+            operator: "="
+            scope: span
+            type: static
+          - id: rpc-status
+            tag: xrpl.rpc.status
+            operator: "="
+            scope: span
+            type: dynamic
+          - id: rpc-role
+            tag: xrpl.rpc.role
+            operator: "="
+            scope: span
+            type: dynamic
+          # Phase 3: Transaction tracing filters
+          - id: tx-hash
+            tag: xrpl.tx.hash
+            operator: "="
+            scope: span
+            type: static
+          - id: tx-origin
+            tag: xrpl.tx.local
+            operator: "="
+            scope: span
+            type: dynamic
+          - id: tx-status
+            tag: xrpl.tx.status
+            operator: "="
+            scope: span
+            type: dynamic
+          # Phase 4: Consensus tracing filters
+          - id: consensus-mode
+            tag: xrpl.consensus.mode
+            operator: "="
+            scope: span
+            type: static
+          - id: consensus-round
+            tag: xrpl.consensus.round
+            operator: "="
+            scope: span
+            type: dynamic
+          - id: consensus-ledger-seq
+            tag: xrpl.consensus.ledger.seq
+            operator: "="
+            scope: span
+            type: static
+          - id: consensus-close-time-correct
+            tag: xrpl.consensus.close_time_correct
+            operator: "="
+            scope: span
+            type: dynamic
+          - id: consensus-state
+            tag: xrpl.consensus.state
+            operator: "="
+            scope: span
+            type: dynamic
+          - id: consensus-close-resolution
+            tag: xrpl.consensus.close_resolution_ms
+            operator: "="
+            scope: span
+            type: dynamic
--- a/docker/telemetry/otel-collector-config.yaml
+++ b/docker/telemetry/otel-collector-config.yaml
@@ -0,0 +1,39 @@
+# OpenTelemetry Collector configuration for rippled development.
+#
+# Pipeline: OTLP receiver -> batch processor -> debug + Jaeger + Tempo.
+# rippled sends traces via OTLP/HTTP to port 4318. The collector batches
+# them and forwards to both Jaeger and Tempo via OTLP/gRPC on the Docker
+# network. Jaeger provides a standalone UI at :16686; Tempo is queryable
+# via Grafana Explore using TraceQL.
+
+receivers:
+  otlp:
+    protocols:
+      grpc:
+        endpoint: 0.0.0.0:4317
+      http:
+        endpoint: 0.0.0.0:4318
+
+processors:
+  batch:
+    timeout: 1s
+    send_batch_size: 100
+
+exporters:
+  debug:
+    verbosity: detailed
+  otlp/jaeger:
+    endpoint: jaeger:4317
+    tls:
+      insecure: true
+  otlp/tempo:
+    endpoint: tempo:4317
+    tls:
+      insecure: true
+
+service:
+  pipelines:
+    traces:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [debug, otlp/jaeger, otlp/tempo]
--- a/docker/telemetry/tempo.yaml
+++ b/docker/telemetry/tempo.yaml
@@ -0,0 +1,59 @@
+# Grafana Tempo configuration for rippled telemetry stack.
+#
+# Runs in single-binary mode for local development.
+# Receives traces via OTLP/gRPC from the OTel Collector and stores
+# them locally. Queryable via Grafana Explore using the Tempo datasource.
+#
+# Search filters are configured on the Grafana datasource side
+# (grafana/provisioning/datasources/tempo.yaml). Tempo auto-indexes
+# all span attributes for search in single-binary mode.
+#
+# For production, replace local storage with S3/GCS backend and adjust
+# retention via the compactor settings. See:
+# https://grafana.com/docs/tempo/latest/configuration/
+
+stream_over_http_enabled: true
+
+server:
+  http_listen_port: 3200
+
+distributor:
+  receivers:
+    otlp:
+      protocols:
+        grpc:
+          endpoint: 0.0.0.0:4317
+
+ingester:
+  max_block_duration: 5m
+
+compactor:
+  compaction:
+    block_retention: 1h
+
+# Enable metrics generator for service graph and span metrics.
+# Produces RED metrics (rate, errors, duration) per service/span,
+# feeding Grafana's service map visualization.
+metrics_generator:
+  registry:
+    external_labels:
+      source: tempo
+  storage:
+    path: /var/tempo/generator/wal
+    remote_write:
+      - url: http://prometheus:9090/api/v1/write
+
+overrides:
+  defaults:
+    metrics_generator:
+      processors:
+        - service-graphs
+        - span-metrics
+
+storage:
+  trace:
+    backend: local
+    wal:
+      path: /var/tempo/wal
+    local:
+      path: /var/tempo/blocks
--- a/docs/build/telemetry.md
+++ b/docs/build/telemetry.md
@@ -0,0 +1,278 @@
+# OpenTelemetry Tracing for Rippled
+
+This document explains how to build rippled with OpenTelemetry distributed tracing support, configure the runtime telemetry options, and set up the observability backend to view traces.
+
+- [OpenTelemetry Tracing for Rippled](#opentelemetry-tracing-for-rippled)
+  - [Overview](#overview)
+  - [Building with Telemetry](#building-with-telemetry)
+    - [Summary](#summary)
+    - [Build steps](#build-steps)
+      - [Install dependencies](#install-dependencies)
+      - [Call CMake](#call-cmake)
+      - [Build](#build)
+    - [Building without telemetry](#building-without-telemetry)
+  - [Runtime Configuration](#runtime-configuration)
+    - [Configuration options](#configuration-options)
+  - [Observability Stack](#observability-stack)
+    - [Start the stack](#start-the-stack)
+    - [Verify the stack](#verify-the-stack)
+    - [View traces in Jaeger](#view-traces-in-jaeger)
+  - [Running Tests](#running-tests)
+  - [Troubleshooting](#troubleshooting)
+    - [No traces appear in Jaeger](#no-traces-appear-in-jaeger)
+    - [Conan lockfile error](#conan-lockfile-error)
+    - [CMake target not found](#cmake-target-not-found)
+  - [Architecture](#architecture)
+    - [Key files](#key-files)
+    - [Conditional compilation](#conditional-compilation)
+
+## Overview
+
+Rippled supports optional [OpenTelemetry](https://opentelemetry.io/) distributed tracing.
+When enabled, it instruments RPC requests with trace spans that are exported via
+OTLP/HTTP to an OpenTelemetry Collector, which forwards them to a tracing backend
+such as Jaeger.
+
+Telemetry is **off by default** at both compile time and runtime:
+
+- **Compile time**: The Conan option `telemetry` and CMake option `telemetry` must be set to `True`/`ON`.
+  When disabled, all tracing macros compile to `((void)0)` with zero overhead.
+- **Runtime**: The `[telemetry]` config section must set `enabled=1`.
+  When disabled at runtime, a no-op implementation is used.
+
+## Building with Telemetry
+
+### Summary
+
+Follow the same instructions as mentioned in [BUILD.md](../../BUILD.md) but with the following changes:
+
+1. Pass `-o telemetry=True` to `conan install` to pull the `opentelemetry-cpp` dependency.
+2. CMake will automatically pick up `telemetry=ON` from the Conan-generated toolchain.
+3. Build as usual.
+
+---
+
+### Build steps
+
+```bash
+cd /path/to/rippled
+rm -rf .build
+mkdir .build
+cd .build
+```
+
+#### Install dependencies
+
+The `telemetry` option adds `opentelemetry-cpp/1.18.0` as a dependency.
+If the Conan lockfile does not yet include this package, bypass it with `--lockfile=""`.
+
+```bash
+conan install .. \
+    --output-folder . \
+    --build missing \
+    --settings build_type=Debug \
+    -o telemetry=True \
+    -o tests=True \
+    -o xrpld=True \
+    --lockfile=""
+```
+
+> **Note**: The first build with telemetry may take longer as `opentelemetry-cpp`
+> and its transitive dependencies are compiled from source.
+
+#### Call CMake
+
+The Conan-generated toolchain file sets `telemetry=ON` automatically.
+No additional CMake flags are needed beyond the standard ones.
+
+```bash
+cmake .. -G Ninja \
+    -DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
+    -DCMAKE_BUILD_TYPE=Debug \
+    -Dtests=ON -Dxrpld=ON
+```
+
+You should see in the CMake output:
+
+```
+-- OpenTelemetry tracing enabled
+```
+
+#### Build
+
+```bash
+cmake --build . --parallel $(nproc)
+```
+
+### Building without telemetry
+
+Omit the `-o telemetry=True` option (or pass `-o telemetry=False`).
+The `opentelemetry-cpp` dependency will not be downloaded,
+the `XRPL_ENABLE_TELEMETRY` preprocessor define will not be set,
+and all tracing macros will compile to no-ops.
+The resulting binary is identical to one built before telemetry support was added.
+
+## Runtime Configuration
+
+Add a `[telemetry]` section to your `xrpld.cfg` file:
+
+```ini
+[telemetry]
+enabled=1
+service_name=rippled
+endpoint=http://localhost:4318/v1/traces
+sampling_ratio=1.0
+trace_rpc=1
+trace_transactions=1
+trace_consensus=1
+trace_peer=0
+```
+
+### Configuration options
+
+| Option                | Type   | Default                           | Description                                        |
+| --------------------- | ------ | --------------------------------- | -------------------------------------------------- |
+| `enabled`             | int    | `0`                               | Enable (`1`) or disable (`0`) telemetry at runtime |
+| `service_name`        | string | `rippled`                         | Service name reported in traces                    |
+| `service_instance_id` | string | node public key                   | Unique instance identifier                         |
+| `exporter`            | string | `otlp_http`                       | Exporter type                                      |
+| `endpoint`            | string | `http://localhost:4318/v1/traces` | OTLP/HTTP collector endpoint                       |
+| `use_tls`             | int    | `0`                               | Enable TLS for the exporter connection             |
+| `tls_ca_cert`         | string | (empty)                           | Path to CA certificate for TLS                     |
+| `sampling_ratio`      | double | `1.0`                             | Fraction of traces to sample (`0.0` to `1.0`)      |
+| `batch_size`          | uint32 | `512`                             | Maximum spans per export batch                     |
+| `batch_delay_ms`      | uint32 | `5000`                            | Maximum delay (ms) before flushing a batch         |
+| `max_queue_size`      | uint32 | `2048`                            | Maximum spans queued in memory                     |
+| `trace_rpc`           | int    | `1`                               | Enable RPC request tracing                         |
+| `trace_transactions`  | int    | `1`                               | Enable transaction lifecycle tracing               |
+| `trace_consensus`     | int    | `1`                               | Enable consensus round tracing                     |
+| `trace_peer`          | int    | `0`                               | Enable peer message tracing (high volume)          |
+| `trace_ledger`        | int    | `1`                               | Enable ledger close tracing                        |
+
+## Observability Stack
+
+A Docker Compose stack is provided in `docker/telemetry/` with three services:
+
+| Service            | Port                                           | Purpose                                              |
+| ------------------ | ---------------------------------------------- | ---------------------------------------------------- |
+| **OTel Collector** | `4317` (gRPC), `4318` (HTTP), `13133` (health) | Receives OTLP spans, batches, and forwards to Jaeger |
+| **Jaeger**         | `16686` (UI)                                   | Trace storage and visualization                      |
+| **Grafana**        | `3000`                                         | Dashboards (Jaeger pre-configured as datasource)     |
+
+### Start the stack
+
+```bash
+docker compose -f docker/telemetry/docker-compose.yml up -d
+```
+
+### Verify the stack
+
+```bash
+# Collector health
+curl http://localhost:13133
+
+# Jaeger UI
+open http://localhost:16686
+
+# Grafana
+open http://localhost:3000
+```
+
+### View traces in Jaeger
+
+1. Open `http://localhost:16686` in a browser.
+2. Select the service name (e.g. `rippled`) from the **Service** dropdown.
+3. Click **Find Traces**.
+4. Click into any trace to see the span tree and attributes.
+
+Traced RPC operations produce a span hierarchy like:
+
+```
+rpc.request
+  └── rpc.command.server_info  (xrpl.rpc.command=server_info, xrpl.rpc.status=success)
+```
+
+Each span includes attributes:
+
+- `xrpl.rpc.command` — the RPC method name
+- `xrpl.rpc.version` — API version
+- `xrpl.rpc.role` — `admin` or `user`
+- `xrpl.rpc.status` — `success` or `error`
+
+## Running Tests
+
+Unit tests run with the telemetry-enabled build regardless of whether the
+observability stack is running. When no collector is available, the exporter
+silently drops spans with no impact on test results.
+
+```bash
+# Run all RPC tests
+./xrpld --unittest=RPCCall,ServerInfo,AccountTx,LedgerRPC,Transaction --unittest-jobs $(nproc)
+
+# Run the full test suite
+./xrpld --unittest --unittest-jobs $(nproc)
+```
+
+To generate traces during manual testing, start rippled in standalone mode:
+
+```bash
+./xrpld --conf /path/to/xrpld.cfg --standalone --start
+```
+
+Then send RPC requests:
+
+```bash
+curl -s -X POST http://127.0.0.1:5005/ \
+    -H "Content-Type: application/json" \
+    -d '{"method":"server_info","params":[{}]}'
+```
+
+## Troubleshooting
+
+### No traces appear in Jaeger
+
+1. Confirm the OTel Collector is running: `docker compose -f docker/telemetry/docker-compose.yml ps`
+2. Check collector logs for errors: `docker compose -f docker/telemetry/docker-compose.yml logs otel-collector`
+3. Confirm `[telemetry] enabled=1` is set in the rippled config.
+4. Confirm `endpoint` points to the correct collector address (`http://localhost:4318/v1/traces`).
+5. Wait for the batch delay to elapse (default `5000` ms) before checking Jaeger.
+
+### Conan lockfile error
+
+If you see `ERROR: Requirement 'opentelemetry-cpp/1.18.0' not in lockfile 'requires'`,
+the lockfile was generated without the telemetry dependency.
+Pass `--lockfile=""` to bypass the lockfile, or regenerate it with telemetry enabled.
+
+### CMake target not found
+
+If CMake reports that `opentelemetry-cpp` targets are not found,
+ensure you ran `conan install` with `-o telemetry=True` and that the
+Conan-generated toolchain file is being used.
+The Conan package provides a single umbrella target
+`opentelemetry-cpp::opentelemetry-cpp` (not individual component targets).
+
+## Architecture
+
+### Key files
+
+| File                                           | Purpose                                                     |
+| ---------------------------------------------- | ----------------------------------------------------------- |
+| `include/xrpl/telemetry/Telemetry.h`           | Abstract telemetry interface and `Setup` struct             |
+| `include/xrpl/telemetry/SpanGuard.h`           | RAII span guard (activates scope, ends span on destruction) |
+| `src/libxrpl/telemetry/Telemetry.cpp`          | OTel-backed implementation (`TelemetryImpl`)                |
+| `src/libxrpl/telemetry/TelemetryConfig.cpp`    | Config parser (`setup_Telemetry()`)                         |
+| `src/libxrpl/telemetry/NullTelemetry.cpp`      | No-op implementation (used when disabled)                   |
+| `src/xrpld/telemetry/TracingInstrumentation.h` | Convenience macros (`XRPL_TRACE_RPC`, etc.)                 |
+| `src/xrpld/rpc/detail/ServerHandler.cpp`       | RPC entry point instrumentation                             |
+| `src/xrpld/rpc/detail/RPCHandler.cpp`          | Per-command instrumentation                                 |
+| `docker/telemetry/docker-compose.yml`          | Observability stack (Collector + Jaeger + Grafana)          |
+| `docker/telemetry/otel-collector-config.yaml`  | OTel Collector pipeline configuration                       |
+
+### Conditional compilation
+
+All OpenTelemetry SDK headers are guarded behind `#ifdef XRPL_ENABLE_TELEMETRY`.
+The instrumentation macros in `TracingInstrumentation.h` compile to `((void)0)` when
+the define is absent.
+At runtime, if `enabled=0` is set in config (or the section is omitted), a
+`NullTelemetry` implementation is used that returns no-op spans.
+This two-layer approach ensures zero overhead when telemetry is not wanted.
--- a/include/xrpl/basics/MallocTrim.h
+++ b/include/xrpl/basics/MallocTrim.h
@@ -1,73 +0,0 @@
-#pragma once
-
-#include <xrpl/beast/utility/Journal.h>
-
-#include <chrono>
-#include <cstdint>
-#include <string_view>
-
-namespace xrpl {
-
-// cSpell:ignore ptmalloc
-
-// -----------------------------------------------------------------------------
-// Allocator interaction note:
-// - This facility invokes glibc's malloc_trim(0) on Linux/glibc to request that
-//   ptmalloc return free heap pages to the OS.
-// - If an alternative allocator (e.g. jemalloc or tcmalloc) is linked or
-//   preloaded (LD_PRELOAD), calling glibc's malloc_trim typically has no effect
-//   on the *active* heap. The call is harmless but may not reclaim memory
-//   because those allocators manage their own arenas.
-// - Only glibc sbrk/arena space is eligible for trimming; large mmap-backed
-//   allocations are usually returned to the OS on free regardless of trimming.
-// - Call at known reclamation points (e.g., after cache sweeps / online delete)
-//   and consider rate limiting to avoid churn.
-// -----------------------------------------------------------------------------
-
-struct MallocTrimReport
-{
-    bool supported{false};
-    int trimResult{-1};
-    std::int64_t rssBeforeKB{-1};
-    std::int64_t rssAfterKB{-1};
-    std::chrono::microseconds durationUs{-1};
-    std::int64_t minfltDelta{-1};
-    std::int64_t majfltDelta{-1};
-
-    [[nodiscard]] std::int64_t
-    deltaKB() const noexcept
-    {
-        if (rssBeforeKB < 0 || rssAfterKB < 0)
-            return 0;
-        return rssAfterKB - rssBeforeKB;
-    }
-};
-
-/**
- * @brief Attempt to return freed memory to the operating system.
- *
- * On Linux with glibc malloc, this issues ::malloc_trim(0), which may release
- * free space from ptmalloc arenas back to the kernel. On other platforms, or if
- * a different allocator is in use, this function is a no-op and the report will
- * indicate that trimming is unsupported or had no effect.
- *
- * @param tag     Identifier for logging/debugging purposes.
- * @param journal Journal for diagnostic logging.
- * @return Report containing before/after metrics and the trim result.
- *
- * @note If an alternative allocator (jemalloc/tcmalloc) is linked or preloaded,
- *       calling glibc's malloc_trim may have no effect on the active heap. The
- *       call is harmless but typically does not reclaim memory under those
- *       allocators.
- *
- * @note Only memory served from glibc's sbrk/arena heaps is eligible for trim.
- *       Large allocations satisfied via mmap are usually returned on free
- *       independently of trimming.
- *
- * @note Intended for use after operations that free significant memory (e.g.,
- *       cache sweeps, ledger cleanup, online delete). Consider rate limiting.
- */
-MallocTrimReport
-mallocTrim(std::string_view tag, beast::Journal journal);
-
-}  // namespace xrpl
--- a/include/xrpl/core/ServiceRegistry.h
+++ b/include/xrpl/core/ServiceRegistry.h
@@ -19,6 +19,9 @@ class Manager;
 namespace perf {
 class PerfLog;
 }
+namespace telemetry {
+class Telemetry;
+}

 // This is temporary until we migrate all code to use ServiceRegistry.
 class Application;
@@ -205,6 +208,9 @@ public:
    virtual perf::PerfLog&
    getPerfLog() = 0;

+    virtual telemetry::Telemetry&
+    getTelemetry() = 0;
+
    // Configuration and state
    virtual bool
    isStopping() const = 0;
--- a/include/xrpl/nodestore/Backend.h
+++ b/include/xrpl/nodestore/Backend.h
@@ -77,16 +77,16 @@ public:
        If the object is not found or an error is encountered, the
        result will indicate the condition.
        @note This will be called concurrently.
-        @param hash The hash of the object.
+        @param key A pointer to the key data.
        @param pObject [out] The created object if successful.
        @return The result of the operation.
    */
    virtual Status
-    fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pObject) = 0;
+    fetch(void const* key, std::shared_ptr<NodeObject>* pObject) = 0;

    /** Fetch a batch synchronously. */
    virtual std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
-    fetchBatch(std::vector<uint256> const& hashes) = 0;
+    fetchBatch(std::vector<uint256 const*> const& hashes) = 0;

    /** Store a single object.
        Depending on the implementation this may happen immediately
--- a/include/xrpl/proto/xrpl.proto
+++ b/include/xrpl/proto/xrpl.proto
@@ -85,6 +85,15 @@ message TMPublicKey {
 // If you want to send an amount that is greater than any single address of yours
 // you must first combine coins from one address to another.

+// Trace context for OpenTelemetry distributed tracing across nodes.
+// Uses W3C Trace Context format internally.
+message TraceContext {
+  optional bytes trace_id = 1;      // 16-byte trace identifier
+  optional bytes span_id = 2;       // 8-byte parent span identifier
+  optional uint32 trace_flags = 3;  // bit 0 = sampled
+  optional string trace_state = 4;  // W3C tracestate header value
+}
+
 enum TransactionStatus {
  tsNEW = 1;        // origin node did/could not validate
  tsCURRENT = 2;    // scheduled to go in this ledger
@@ -101,6 +110,9 @@ message TMTransaction {
  required TransactionStatus status = 2;
  optional uint64 receiveTimestamp = 3;
  optional bool deferred = 4;  // not applied to open ledger
+
+  // Optional trace context for OpenTelemetry distributed tracing
+  optional TraceContext trace_context = 1001;
 }

 message TMTransactions {
@@ -149,6 +161,9 @@ message TMProposeSet {

  // Number of hops traveled
  optional uint32 hops = 12 [deprecated = true];
+
+  // Optional trace context for OpenTelemetry distributed tracing
+  optional TraceContext trace_context = 1001;
 }

 enum TxSetStatus {
@@ -194,6 +209,9 @@ message TMValidation {

  // Number of hops traveled
  optional uint32 hops = 3 [deprecated = true];
+
+  // Optional trace context for OpenTelemetry distributed tracing
+  optional TraceContext trace_context = 1001;
 }

 // An array of Endpoint messages
--- a/include/xrpl/protocol/detail/features.macro
+++ b/include/xrpl/protocol/detail/features.macro
@@ -15,10 +15,9 @@

 // Add new amendments to the top of this list.
 // Keep it sorted in reverse chronological order.
-
 XRPL_FIX   (PermissionedDomainInvariant, Supported::yes, VoteBehavior::DefaultNo)
 XRPL_FIX    (ExpiredNFTokenOfferRemoval, Supported::yes, VoteBehavior::DefaultNo)
-XRPL_FIX    (BatchInnerSigs,             Supported::no, VoteBehavior::DefaultNo)
+XRPL_FIX    (BatchInnerSigs,             Supported::yes, VoteBehavior::DefaultNo)
 XRPL_FEATURE(LendingProtocol,            Supported::yes, VoteBehavior::DefaultNo)
 XRPL_FEATURE(PermissionDelegationV1_1,   Supported::no,  VoteBehavior::DefaultNo)
 XRPL_FIX    (DirectoryLimit,             Supported::yes, VoteBehavior::DefaultNo)
@@ -32,7 +31,7 @@ XRPL_FEATURE(TokenEscrow,                Supported::yes, VoteBehavior::DefaultNo
 XRPL_FIX    (EnforceNFTokenTrustlineV2,  Supported::yes, VoteBehavior::DefaultNo)
 XRPL_FIX    (AMMv1_3,                    Supported::yes, VoteBehavior::DefaultNo)
 XRPL_FEATURE(PermissionedDEX,            Supported::yes, VoteBehavior::DefaultNo)
-XRPL_FEATURE(Batch,                      Supported::no,  VoteBehavior::DefaultNo)
+XRPL_FEATURE(Batch,                      Supported::yes, VoteBehavior::DefaultNo)
 XRPL_FEATURE(SingleAssetVault,           Supported::yes,  VoteBehavior::DefaultNo)
 XRPL_FIX    (PayChanCancelAfter,         Supported::yes, VoteBehavior::DefaultNo)
 // Check flags in Credential transactions
--- a/include/xrpl/telemetry/SpanGuard.h
+++ b/include/xrpl/telemetry/SpanGuard.h
@@ -0,0 +1,174 @@
+#pragma once
+
+/** RAII guard for OpenTelemetry trace spans.
+
+    Wraps an OTel Span and Scope together. On construction, the span is
+    activated on the current thread's context (via Scope). On destruction,
+    the span is ended and the previous context is restored.
+
+    Used by the XRPL_TRACE_* macros in TracingInstrumentation.h. Can also
+    be stored in std::optional for conditional tracing (move-constructible).
+
+    Only compiled when XRPL_ENABLE_TELEMETRY is defined.
+*/
+
+#ifdef XRPL_ENABLE_TELEMETRY
+
+#include <opentelemetry/context/runtime_context.h>
+#include <opentelemetry/nostd/shared_ptr.h>
+#include <opentelemetry/trace/scope.h>
+#include <opentelemetry/trace/span.h>
+
+#include <exception>
+#include <string_view>
+
+namespace xrpl {
+namespace telemetry {
+
+/** RAII wrapper that activates a span on construction and ends it on
+    destruction. Non-copyable but move-constructible so it can be held
+    in std::optional for conditional tracing.
+*/
+class SpanGuard
+{
+    /** The OTel span being guarded. Set to nullptr after move. */
+    opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span_;
+
+    /** Scope that activates span_ on the current thread's context stack. */
+    opentelemetry::trace::Scope scope_;
+
+public:
+    /** Construct a guard that activates @p span on the current context.
+
+        @param span  The span to guard. Ended in the destructor.
+    */
+    explicit SpanGuard(opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span)
+        : span_(std::move(span)), scope_(span_)
+    {
+    }
+
+    /** Non-copyable. Move-constructible to support std::optional.
+
+        The move constructor creates a new Scope from the transferred span,
+        because Scope is not movable.
+    */
+    SpanGuard(SpanGuard const&) = delete;
+    SpanGuard&
+    operator=(SpanGuard const&) = delete;
+    SpanGuard(SpanGuard&& other) noexcept : span_(std::move(other.span_)), scope_(span_)
+    {
+        other.span_ = nullptr;
+    }
+    SpanGuard&
+    operator=(SpanGuard&&) = delete;
+
+    ~SpanGuard()
+    {
+        if (span_)
+            span_->End();
+    }
+
+    /** @return A mutable reference to the underlying span. */
+    opentelemetry::trace::Span&
+    span()
+    {
+        return *span_;
+    }
+
+    /** @return A const reference to the underlying span. */
+    opentelemetry::trace::Span const&
+    span() const
+    {
+        return *span_;
+    }
+
+    /** Mark the span status as OK. */
+    void
+    setOk()
+    {
+        span_->SetStatus(opentelemetry::trace::StatusCode::kOk);
+    }
+
+    /** Set an explicit status code on the span.
+
+        @param code         The OTel status code.
+        @param description  Optional human-readable status description.
+    */
+    void
+    setStatus(opentelemetry::trace::StatusCode code, std::string_view description = "")
+    {
+        span_->SetStatus(code, std::string(description));
+    }
+
+    /** Set a key-value attribute on the span.
+
+        @param key    Attribute name (e.g. "xrpl.rpc.command").
+        @param value  Attribute value (string, int, bool, etc.).
+    */
+    template <typename T>
+    void
+    setAttribute(std::string_view key, T&& value)
+    {
+        span_->SetAttribute(
+            opentelemetry::nostd::string_view(key.data(), key.size()), std::forward<T>(value));
+    }
+
+    /** Add a named event to the span's timeline.
+
+        @param name  Event name.
+    */
+    void
+    addEvent(std::string_view name)
+    {
+        span_->AddEvent(std::string(name));
+    }
+
+    /** Add a named event with key-value attributes to the span.
+
+        Allows attaching structured metadata to a point-in-time event on
+        the span timeline (e.g., "dispute.resolve" with transaction ID
+        and vote result attributes).
+
+        @param name        Event name (e.g., "dispute.resolve").
+        @param attributes  Key-value pairs describing the event.
+    */
+    void
+    addEvent(
+        std::string_view name,
+        std::initializer_list<
+            std::pair<opentelemetry::nostd::string_view, opentelemetry::common::AttributeValue>>
+            attributes)
+    {
+        span_->AddEvent(std::string(name), attributes);
+    }
+
+    /** Record an exception as a span event following OTel semantic
+        conventions, and mark the span status as error.
+
+        @param e  The exception to record.
+    */
+    void
+    recordException(std::exception const& e)
+    {
+        span_->AddEvent(
+            "exception",
+            {{"exception.type", "std::exception"}, {"exception.message", std::string(e.what())}});
+        span_->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
+    }
+
+    /** Return the current OTel context.
+
+        Useful for creating child spans on a different thread by passing
+        this context to Telemetry::startSpan(name, parentContext).
+    */
+    opentelemetry::context::Context
+    context() const
+    {
+        return opentelemetry::context::RuntimeContext::GetCurrent();
+    }
+};
+
+}  // namespace telemetry
+}  // namespace xrpl
+
+#endif  // XRPL_ENABLE_TELEMETRY
--- a/include/xrpl/telemetry/Telemetry.h
+++ b/include/xrpl/telemetry/Telemetry.h
@@ -0,0 +1,282 @@
+#pragma once
+
+/** Abstract interface for OpenTelemetry distributed tracing.
+
+    Provides the Telemetry base class that all components use to create trace
+    spans. Two implementations exist:
+
+      - TelemetryImpl (Telemetry.cpp): real OTel SDK integration, compiled
+        only when XRPL_ENABLE_TELEMETRY is defined and enabled at runtime.
+      - NullTelemetry (NullTelemetry.cpp): no-op stub used when telemetry is
+        disabled at compile time or runtime.
+
+    The Setup struct holds all configuration parsed from the [telemetry]
+    section of xrpld.cfg. See TelemetryConfig.cpp for the parser and
+    cfg/xrpld-example.cfg for the available options.
+
+    OTel SDK headers are conditionally included behind XRPL_ENABLE_TELEMETRY
+    so that builds without telemetry have zero dependency on opentelemetry-cpp.
+*/
+
+#include <xrpl/basics/BasicConfig.h>
+#include <xrpl/beast/utility/Journal.h>
+
+#include <chrono>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#ifdef XRPL_ENABLE_TELEMETRY
+#include <opentelemetry/common/attribute_value.h>
+#include <opentelemetry/context/context.h>
+#include <opentelemetry/nostd/shared_ptr.h>
+#include <opentelemetry/trace/span.h>
+#include <opentelemetry/trace/span_context.h>
+#include <opentelemetry/trace/tracer.h>
+
+#include <utility>
+#include <vector>
+#endif
+
+namespace xrpl {
+namespace telemetry {
+
+class Telemetry
+{
+public:
+    /** Configuration parsed from the [telemetry] section of xrpld.cfg.
+
+        All fields have sensible defaults so the section can be minimal
+        or omitted entirely. See TelemetryConfig.cpp for the parser.
+    */
+    struct Setup
+    {
+        /** Master switch: true to enable tracing at runtime. */
+        bool enabled = false;
+
+        /** OTel resource attribute `service.name`. */
+        std::string serviceName = "rippled";
+
+        /** OTel resource attribute `service.version` (set from BuildInfo). */
+        std::string serviceVersion;
+
+        /** OTel resource attribute `service.instance.id` (defaults to node
+            public key). */
+        std::string serviceInstanceId;
+
+        /** Exporter type: currently only "otlp_http" is supported. */
+        std::string exporterType = "otlp_http";
+
+        /** OTLP/HTTP endpoint URL where spans are sent. */
+        std::string exporterEndpoint = "http://localhost:4318/v1/traces";
+
+        /** Whether to use TLS for the exporter connection. */
+        bool useTls = false;
+
+        /** Path to a CA certificate bundle for TLS verification. */
+        std::string tlsCertPath;
+
+        /** Head-based sampling ratio in [0.0, 1.0]. 1.0 = trace everything. */
+        double samplingRatio = 1.0;
+
+        /** Maximum number of spans per batch export. */
+        std::uint32_t batchSize = 512;
+
+        /** Delay between batch exports. */
+        std::chrono::milliseconds batchDelay{5000};
+
+        /** Maximum number of spans queued before dropping. */
+        std::uint32_t maxQueueSize = 2048;
+
+        /** Network identifier, added as an OTel resource attribute. */
+        std::uint32_t networkId = 0;
+
+        /** Network type label (e.g. "mainnet", "testnet", "devnet"). */
+        std::string networkType = "mainnet";
+
+        /** Enable tracing for transaction processing. */
+        bool traceTransactions = true;
+
+        /** Enable tracing for consensus rounds. */
+        bool traceConsensus = true;
+
+        /** Enable tracing for RPC request handling. */
+        bool traceRpc = true;
+
+        /** Enable tracing for peer-to-peer messages (disabled by default
+            due to high volume). */
+        bool tracePeer = false;
+
+        /** Enable tracing for ledger close/accept. */
+        bool traceLedger = true;
+
+        /** Cross-node correlation strategy for consensus tracing.
+
+            "deterministic" derives trace_id from previousLedger.id() so all
+            nodes participating in the same consensus round share the same
+            trace_id, enabling cross-node trace correlation in the backend.
+
+            "attribute" uses normal random trace_id with the ledger_id stored
+            as a span attribute; correlation must be done via attribute queries.
+        */
+        std::string consensusTraceStrategy = "deterministic";
+    };
+
+    virtual ~Telemetry() = default;
+
+    /** Update the service instance ID (OTel resource attribute
+        `service.instance.id`).
+
+        Must be called before start(). The node public key is not available
+        when Telemetry is constructed (during the ApplicationImp member
+        initializer list), so this setter allows Application::setup() to
+        inject the identity once nodeIdentity_ is known.
+
+        @param id  The node's base58-encoded public key or custom identifier.
+    */
+    virtual void
+    setServiceInstanceId(std::string const& id)
+    {
+        // Default no-op for NullTelemetry implementations.
+        (void)id;
+    }
+
+    /** Initialize the tracing pipeline (exporter, processor, provider).
+        Call after construction.
+    */
+    virtual void
+    start() = 0;
+
+    /** Flush pending spans and shut down the tracing pipeline.
+        Call before destruction.
+    */
+    virtual void
+    stop() = 0;
+
+    /** @return true if this instance is actively exporting spans. */
+    virtual bool
+    isEnabled() const = 0;
+
+    /** @return true if transaction processing should be traced. */
+    virtual bool
+    shouldTraceTransactions() const = 0;
+
+    /** @return true if consensus rounds should be traced. */
+    virtual bool
+    shouldTraceConsensus() const = 0;
+
+    /** @return true if RPC request handling should be traced. */
+    virtual bool
+    shouldTraceRpc() const = 0;
+
+    /** @return true if peer-to-peer messages should be traced. */
+    virtual bool
+    shouldTracePeer() const = 0;
+
+    /** @return true if ledger close/accept should be traced. */
+    virtual bool
+    shouldTraceLedger() const = 0;
+
+    /** @return The consensus trace correlation strategy.
+
+        "deterministic" derives trace_id from previousLedger.id() so all
+        nodes participating in the same consensus round share the same
+        trace_id, enabling cross-node trace correlation in the backend.
+
+        "attribute" uses normal random trace_id with the ledger_id stored
+        as a span attribute; correlation must be done via attribute queries.
+    */
+    virtual std::string const&
+    getConsensusTraceStrategy() const = 0;
+
+#ifdef XRPL_ENABLE_TELEMETRY
+    /** Get or create a named tracer instance.
+
+        @param name  Tracer name used to identify the instrumentation library.
+        @return A shared pointer to the Tracer.
+    */
+    virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>
+    getTracer(std::string_view name = "rippled") = 0;
+
+    /** Start a new span on the current thread's context.
+
+        The span becomes a child of the current active span (if any) via
+        OpenTelemetry's context propagation.
+
+        @param name  Span name (typically "rpc.command.<cmd>").
+        @param kind  The span kind (defaults to kInternal).
+        @return A shared pointer to the new Span.
+    */
+    virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
+    startSpan(
+        std::string_view name,
+        opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0;
+
+    /** Start a new span with an explicit parent context.
+
+        Use this overload when the parent span is not on the current
+        thread's context stack (e.g. cross-thread trace propagation).
+
+        @param name           Span name.
+        @param parentContext  The parent span's context.
+        @param kind           The span kind (defaults to kInternal).
+        @return A shared pointer to the new Span.
+    */
+    virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
+    startSpan(
+        std::string_view name,
+        opentelemetry::context::Context const& parentContext,
+        opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0;
+
+    /** Start a new span with an explicit parent context and span links.
+
+        Span links establish follows-from relationships without implying
+        a parent-child hierarchy. Common uses include linking consensus
+        round N+1 to round N, or linking a validation span back to the
+        round that produced it.
+
+        @param name           Span name.
+        @param parentContext  The parent span's context.
+        @param links          Vector of (SpanContext, attributes) pairs
+                              for follows-from relationships.
+        @param kind           The span kind (defaults to kInternal).
+        @return A shared pointer to the new Span.
+    */
+    virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
+    startSpan(
+        std::string_view name,
+        opentelemetry::context::Context const& parentContext,
+        std::vector<std::pair<
+            opentelemetry::trace::SpanContext,
+            std::vector<std::pair<std::string, opentelemetry::common::AttributeValue>>>> const&
+            links,
+        opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0;
+#endif
+};
+
+/** Create a Telemetry instance.
+
+    Returns a TelemetryImpl when setup.enabled is true, or a
+    NullTelemetry no-op stub otherwise.
+
+    @param setup    Configuration from the [telemetry] config section.
+    @param journal  Journal for log output during initialization.
+*/
+std::unique_ptr<Telemetry>
+make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal);
+
+/** Parse the [telemetry] config section into a Setup struct.
+
+    @param section        The [telemetry] config section.
+    @param nodePublicKey  Node public key, used as default instance ID.
+    @param version        Build version string.
+    @return A populated Setup struct with defaults for missing values.
+*/
+Telemetry::Setup
+setup_Telemetry(
+    Section const& section,
+    std::string const& nodePublicKey,
+    std::string const& version);
+
+}  // namespace telemetry
+}  // namespace xrpl
--- a/include/xrpl/telemetry/TraceContextPropagator.h
+++ b/include/xrpl/telemetry/TraceContextPropagator.h
@@ -0,0 +1,94 @@
+#pragma once
+
+/** Utilities for trace context propagation across nodes.
+
+    Provides serialization/deserialization of OTel trace context to/from
+    Protocol Buffer TraceContext messages (P2P cross-node propagation).
+
+    Only compiled when XRPL_ENABLE_TELEMETRY is defined.
+*/
+
+#ifdef XRPL_ENABLE_TELEMETRY
+
+#include <xrpl/proto/xrpl.pb.h>
+
+#include <opentelemetry/context/context.h>
+#include <opentelemetry/trace/context.h>
+#include <opentelemetry/trace/default_span.h>
+#include <opentelemetry/trace/span_context.h>
+#include <opentelemetry/trace/trace_flags.h>
+#include <opentelemetry/trace/trace_id.h>
+
+#include <cstdint>
+
+namespace xrpl {
+namespace telemetry {
+
+/** Extract OTel context from a protobuf TraceContext message.
+
+    @param proto  The protobuf TraceContext received from a peer.
+    @return An OTel Context with the extracted parent span, or an empty
+            context if the protobuf fields are missing or invalid.
+*/
+inline opentelemetry::context::Context
+extractFromProtobuf(protocol::TraceContext const& proto)
+{
+    namespace trace = opentelemetry::trace;
+
+    if (!proto.has_trace_id() || proto.trace_id().size() != 16 || !proto.has_span_id() ||
+        proto.span_id().size() != 8)
+    {
+        return opentelemetry::context::Context{};
+    }
+
+    auto const* rawTraceId = reinterpret_cast<std::uint8_t const*>(proto.trace_id().data());
+    auto const* rawSpanId = reinterpret_cast<std::uint8_t const*>(proto.span_id().data());
+    trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(rawTraceId, 16));
+    trace::SpanId spanId(opentelemetry::nostd::span<std::uint8_t const, 8>(rawSpanId, 8));
+    // Default to not-sampled (0x00) per W3C Trace Context spec when
+    // the trace_flags field is absent.
+    trace::TraceFlags flags(
+        proto.has_trace_flags() ? static_cast<std::uint8_t>(proto.trace_flags())
+                                : static_cast<std::uint8_t>(0));
+
+    trace::SpanContext spanCtx(traceId, spanId, flags, /* remote = */ true);
+
+    return opentelemetry::context::Context{}.SetValue(
+        trace::kSpanKey,
+        opentelemetry::nostd::shared_ptr<trace::Span>(new trace::DefaultSpan(spanCtx)));
+}
+
+/** Inject the current span's trace context into a protobuf TraceContext.
+
+    @param ctx    The OTel context containing the span to propagate.
+    @param proto  The protobuf TraceContext to populate.
+*/
+inline void
+injectToProtobuf(opentelemetry::context::Context const& ctx, protocol::TraceContext& proto)
+{
+    namespace trace = opentelemetry::trace;
+
+    auto span = trace::GetSpan(ctx);
+    if (!span)
+        return;
+
+    auto const& spanCtx = span->GetContext();
+    if (!spanCtx.IsValid())
+        return;
+
+    // Serialize trace_id (16 bytes)
+    auto const& traceId = spanCtx.trace_id();
+    proto.set_trace_id(traceId.Id().data(), trace::TraceId::kSize);
+
+    // Serialize span_id (8 bytes)
+    auto const& spanId = spanCtx.span_id();
+    proto.set_span_id(spanId.Id().data(), trace::SpanId::kSize);
+
+    // Serialize flags
+    proto.set_trace_flags(spanCtx.trace_flags().flags());
+}
+
+}  // namespace telemetry
+}  // namespace xrpl
+
+#endif  // XRPL_ENABLE_TELEMETRY
--- a/src/libxrpl/basics/MallocTrim.cpp
+++ b/src/libxrpl/basics/MallocTrim.cpp
@@ -1,157 +0,0 @@
-#include <xrpl/basics/Log.h>
-#include <xrpl/basics/MallocTrim.h>
-
-#include <boost/predef.h>
-
-#include <chrono>
-#include <cstdint>
-#include <cstdio>
-#include <fstream>
-#include <sstream>
-
-#if defined(__GLIBC__) && BOOST_OS_LINUX
-#include <sys/resource.h>
-
-#include <malloc.h>
-#include <unistd.h>
-
-// Require RUSAGE_THREAD for thread-scoped page fault tracking
-#ifndef RUSAGE_THREAD
-#error "MallocTrim rusage instrumentation requires RUSAGE_THREAD on Linux/glibc"
-#endif
-
-namespace {
-
-bool
-getRusageThread(struct rusage& ru)
-{
-    return ::getrusage(RUSAGE_THREAD, &ru) == 0;  // LCOV_EXCL_LINE
-}
-
-}  // namespace
-#endif
-
-namespace xrpl {
-
-namespace detail {
-
-// cSpell:ignore statm
-
-#if defined(__GLIBC__) && BOOST_OS_LINUX
-
-inline int
-mallocTrimWithPad(std::size_t padBytes)
-{
-    return ::malloc_trim(padBytes);
-}
-
-long
-parseStatmRSSkB(std::string const& statm)
-{
-    // /proc/self/statm format: size resident shared text lib data dt
-    // We want the second field (resident) which is in pages
-    std::istringstream iss(statm);
-    long size = 0, resident = 0;
-    if (!(iss >> size >> resident))
-        return -1;
-
-    // Convert pages to KB
-    long const pageSize = ::sysconf(_SC_PAGESIZE);
-    if (pageSize <= 0)
-        return -1;
-
-    return (resident * pageSize) / 1024;
-}
-
-#endif  // __GLIBC__ && BOOST_OS_LINUX
-
-}  // namespace detail
-
-MallocTrimReport
-mallocTrim(std::string_view tag, beast::Journal journal)
-{
-    // LCOV_EXCL_START
-
-    MallocTrimReport report;
-
-#if !(defined(__GLIBC__) && BOOST_OS_LINUX)
-    JLOG(journal.debug()) << "malloc_trim not supported on this platform (tag=" << tag << ")";
-#else
-    // Keep glibc malloc_trim padding at 0 (default): 12h Mainnet tests across 0/256KB/1MB/16MB
-    // showed no clear, consistent benefit from custom padding—0 provided the best overall balance
-    // of RSS reduction and trim-latency stability without adding a tuning surface.
-    constexpr std::size_t TRIM_PAD = 0;
-
-    report.supported = true;
-
-    if (journal.debug())
-    {
-        auto readFile = [](std::string const& path) -> std::string {
-            std::ifstream ifs(path, std::ios::in | std::ios::binary);
-            if (!ifs.is_open())
-                return {};
-
-            // /proc files are often not seekable; read as a stream.
-            std::ostringstream oss;
-            oss << ifs.rdbuf();
-            return oss.str();
-        };
-
-        std::string const tagStr{tag};
-        std::string const statmPath = "/proc/self/statm";
-
-        auto const statmBefore = readFile(statmPath);
-        long const rssBeforeKB = detail::parseStatmRSSkB(statmBefore);
-
-        struct rusage ru0{};
-        bool const have_ru0 = getRusageThread(ru0);
-
-        auto const t0 = std::chrono::steady_clock::now();
-
-        report.trimResult = detail::mallocTrimWithPad(TRIM_PAD);
-
-        auto const t1 = std::chrono::steady_clock::now();
-
-        struct rusage ru1{};
-        bool const have_ru1 = getRusageThread(ru1);
-
-        auto const statmAfter = readFile(statmPath);
-        long const rssAfterKB = detail::parseStatmRSSkB(statmAfter);
-
-        // Populate report fields
-        report.rssBeforeKB = rssBeforeKB;
-        report.rssAfterKB = rssAfterKB;
-        report.durationUs = std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0);
-
-        if (have_ru0 && have_ru1)
-        {
-            report.minfltDelta = ru1.ru_minflt - ru0.ru_minflt;
-            report.majfltDelta = ru1.ru_majflt - ru0.ru_majflt;
-        }
-
-        std::int64_t const deltaKB = (rssBeforeKB < 0 || rssAfterKB < 0)
-            ? 0
-            : (static_cast<std::int64_t>(rssAfterKB) - static_cast<std::int64_t>(rssBeforeKB));
-
-        JLOG(journal.debug()) << "malloc_trim tag=" << tagStr << " result=" << report.trimResult
-                              << " pad=" << TRIM_PAD << " bytes"
-                              << " rss_before=" << rssBeforeKB << "kB"
-                              << " rss_after=" << rssAfterKB << "kB"
-                              << " delta=" << deltaKB << "kB"
-                              << " duration_us=" << report.durationUs.count()
-                              << " minflt_delta=" << report.minfltDelta
-                              << " majflt_delta=" << report.majfltDelta;
-    }
-    else
-    {
-        report.trimResult = detail::mallocTrimWithPad(TRIM_PAD);
-    }
-
-#endif
-
-    return report;
-
-    // LCOV_EXCL_STOP
-}
-
-}  // namespace xrpl
--- a/src/libxrpl/nodestore/DatabaseNodeImp.cpp
+++ b/src/libxrpl/nodestore/DatabaseNodeImp.cpp
@@ -33,7 +33,7 @@ DatabaseNodeImp::fetchNodeObject(

    try
    {
-        status = backend_->fetch(hash, &nodeObject);
+        status = backend_->fetch(hash.data(), &nodeObject);
    }
    catch (std::exception const& e)
    {
@@ -68,10 +68,18 @@ DatabaseNodeImp::fetchBatch(std::vector<uint256> const& hashes)
    using namespace std::chrono;
    auto const before = steady_clock::now();

+    std::vector<uint256 const*> batch{};
+    batch.reserve(hashes.size());
+    for (size_t i = 0; i < hashes.size(); ++i)
+    {
+        auto const& hash = hashes[i];
+        batch.push_back(&hash);
+    }
+
    // Get the node objects that match the hashes from the backend. To protect
    // against the backends returning fewer or more results than expected, the
    // container is resized to the number of hashes.
-    auto results = backend_->fetchBatch(hashes).first;
+    auto results = backend_->fetchBatch(batch).first;
    XRPL_ASSERT(
        results.size() == hashes.size() || results.empty(),
        "number of output objects either matches number of input hashes or is empty");
--- a/src/libxrpl/nodestore/DatabaseRotatingImp.cpp
+++ b/src/libxrpl/nodestore/DatabaseRotatingImp.cpp
@@ -105,7 +105,7 @@ DatabaseRotatingImp::fetchNodeObject(
        std::shared_ptr<NodeObject> nodeObject;
        try
        {
-            status = backend->fetch(hash, &nodeObject);
+            status = backend->fetch(hash.data(), &nodeObject);
        }
        catch (std::exception const& e)
        {
--- a/src/libxrpl/nodestore/backend/MemoryFactory.cpp
+++ b/src/libxrpl/nodestore/backend/MemoryFactory.cpp
@@ -116,9 +116,10 @@ public:
    //--------------------------------------------------------------------------

    Status
-    fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pObject) override
+    fetch(void const* key, std::shared_ptr<NodeObject>* pObject) override
    {
        XRPL_ASSERT(db_, "xrpl::NodeStore::MemoryBackend::fetch : non-null database");
+        uint256 const hash(uint256::fromVoid(key));

        std::lock_guard _(db_->mutex);

@@ -133,14 +134,14 @@ public:
    }

    std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
-    fetchBatch(std::vector<uint256> const& hashes) override
+    fetchBatch(std::vector<uint256 const*> const& hashes) override
    {
        std::vector<std::shared_ptr<NodeObject>> results;
        results.reserve(hashes.size());
        for (auto const& h : hashes)
        {
            std::shared_ptr<NodeObject> nObj;
-            Status status = fetch(h, &nObj);
+            Status status = fetch(h->begin(), &nObj);
            if (status != ok)
            {
                results.push_back({});
--- a/src/libxrpl/nodestore/backend/NuDBFactory.cpp
+++ b/src/libxrpl/nodestore/backend/NuDBFactory.cpp
@@ -179,17 +179,17 @@ public:
    }

    Status
-    fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pno) override
+    fetch(void const* key, std::shared_ptr<NodeObject>* pno) override
    {
        Status status = ok;
        pno->reset();
        nudb::error_code ec;
        db_.fetch(
-            hash.data(),
-            [&hash, pno, &status](void const* data, std::size_t size) {
+            key,
+            [key, pno, &status](void const* data, std::size_t size) {
                nudb::detail::buffer bf;
                auto const result = nodeobject_decompress(data, size, bf);
-                DecodedBlob decoded(hash.data(), result.first, result.second);
+                DecodedBlob decoded(key, result.first, result.second);
                if (!decoded.wasOk())
                {
                    status = dataCorrupt;
@@ -207,14 +207,14 @@ public:
    }

    std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
-    fetchBatch(std::vector<uint256> const& hashes) override
+    fetchBatch(std::vector<uint256 const*> const& hashes) override
    {
        std::vector<std::shared_ptr<NodeObject>> results;
        results.reserve(hashes.size());
        for (auto const& h : hashes)
        {
            std::shared_ptr<NodeObject> nObj;
-            Status status = fetch(h, &nObj);
+            Status status = fetch(h->begin(), &nObj);
            if (status != ok)
            {
                results.push_back({});
--- a/src/libxrpl/nodestore/backend/NullFactory.cpp
+++ b/src/libxrpl/nodestore/backend/NullFactory.cpp
@@ -36,13 +36,13 @@ public:
    }

    Status
-    fetch(uint256 const&, std::shared_ptr<NodeObject>*) override
+    fetch(void const*, std::shared_ptr<NodeObject>*) override
    {
        return notFound;
    }

    std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
-    fetchBatch(std::vector<uint256> const& hashes) override
+    fetchBatch(std::vector<uint256 const*> const& hashes) override
    {
        return {};
    }
--- a/src/libxrpl/nodestore/backend/RocksDBFactory.cpp
+++ b/src/libxrpl/nodestore/backend/RocksDBFactory.cpp
@@ -250,7 +250,7 @@ public:
    //--------------------------------------------------------------------------

    Status
-    fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pObject) override
+    fetch(void const* key, std::shared_ptr<NodeObject>* pObject) override
    {
        XRPL_ASSERT(m_db, "xrpl::NodeStore::RocksDBBackend::fetch : non-null database");
        pObject->reset();
@@ -258,7 +258,7 @@ public:
        Status status(ok);

        rocksdb::ReadOptions const options;
-        rocksdb::Slice const slice(std::bit_cast<char const*>(hash.data()), m_keyBytes);
+        rocksdb::Slice const slice(static_cast<char const*>(key), m_keyBytes);

        std::string string;

@@ -266,7 +266,7 @@ public:

        if (getStatus.ok())
        {
-            DecodedBlob decoded(hash.data(), string.data(), string.size());
+            DecodedBlob decoded(key, string.data(), string.size());

            if (decoded.wasOk())
            {
@@ -301,14 +301,14 @@ public:
    }

    std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
-    fetchBatch(std::vector<uint256> const& hashes) override
+    fetchBatch(std::vector<uint256 const*> const& hashes) override
    {
        std::vector<std::shared_ptr<NodeObject>> results;
        results.reserve(hashes.size());
        for (auto const& h : hashes)
        {
            std::shared_ptr<NodeObject> nObj;
-            Status status = fetch(h, &nObj);
+            Status status = fetch(h->begin(), &nObj);
            if (status != ok)
            {
                results.push_back({});
@@ -342,8 +342,9 @@ public:
            EncodedBlob encoded(e);

            wb.Put(
-                rocksdb::Slice(std::bit_cast<char const*>(encoded.getKey()), m_keyBytes),
-                rocksdb::Slice(std::bit_cast<char const*>(encoded.getData()), encoded.getSize()));
+                rocksdb::Slice(reinterpret_cast<char const*>(encoded.getKey()), m_keyBytes),
+                rocksdb::Slice(
+                    reinterpret_cast<char const*>(encoded.getData()), encoded.getSize()));
        }

        rocksdb::WriteOptions const options;
--- a/src/libxrpl/telemetry/NullTelemetry.cpp
+++ b/src/libxrpl/telemetry/NullTelemetry.cpp
@@ -0,0 +1,149 @@
+/** No-op implementation of the Telemetry interface.
+
+    Always compiled (regardless of XRPL_ENABLE_TELEMETRY). Provides the
+    make_Telemetry() factory when telemetry is compiled out (#ifndef), which
+    unconditionally returns a NullTelemetry that does nothing.
+
+    When XRPL_ENABLE_TELEMETRY IS defined, the OTel virtual methods
+    (getTracer, startSpan) return noop tracers/spans. The make_Telemetry()
+    factory in this file is not used in that case -- Telemetry.cpp provides
+    its own factory that can return the real TelemetryImpl.
+*/
+
+#include <xrpl/telemetry/Telemetry.h>
+
+#ifdef XRPL_ENABLE_TELEMETRY
+#include <opentelemetry/common/attribute_value.h>
+#include <opentelemetry/trace/noop.h>
+#include <opentelemetry/trace/span_context.h>
+#endif
+
+namespace xrpl {
+namespace telemetry {
+
+namespace {
+
+/** No-op Telemetry that returns immediately from every method.
+
+    Used as the sole implementation when XRPL_ENABLE_TELEMETRY is not
+    defined, or as a fallback when it is defined but enabled=0.
+*/
+class NullTelemetry : public Telemetry
+{
+    /** Retained configuration (unused, kept for diagnostic access). */
+    Setup const setup_;
+
+public:
+    explicit NullTelemetry(Setup const& setup) : setup_(setup)
+    {
+    }
+
+    void
+    start() override
+    {
+    }
+
+    void
+    stop() override
+    {
+    }
+
+    bool
+    isEnabled() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTraceTransactions() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTraceConsensus() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTraceRpc() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTracePeer() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTraceLedger() const override
+    {
+        return false;
+    }
+
+    std::string const&
+    getConsensusTraceStrategy() const override
+    {
+        return setup_.consensusTraceStrategy;
+    }
+
+#ifdef XRPL_ENABLE_TELEMETRY
+    opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>
+    getTracer(std::string_view) override
+    {
+        static auto noopTracer = opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>(
+            new opentelemetry::trace::NoopTracer());
+        return noopTracer;
+    }
+
+    opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
+    startSpan(std::string_view, opentelemetry::trace::SpanKind) override
+    {
+        return opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>(
+            new opentelemetry::trace::NoopSpan(nullptr));
+    }
+
+    opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
+    startSpan(
+        std::string_view,
+        opentelemetry::context::Context const&,
+        opentelemetry::trace::SpanKind) override
+    {
+        return opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>(
+            new opentelemetry::trace::NoopSpan(nullptr));
+    }
+
+    /** No-op: returns a NoopSpan, ignoring links. */
+    opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
+    startSpan(
+        std::string_view,
+        opentelemetry::context::Context const&,
+        std::vector<std::pair<
+            opentelemetry::trace::SpanContext,
+            std::vector<std::pair<std::string, opentelemetry::common::AttributeValue>>>> const&,
+        opentelemetry::trace::SpanKind) override
+    {
+        return opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>(
+            new opentelemetry::trace::NoopSpan(nullptr));
+    }
+#endif
+};
+
+}  // namespace
+
+/** Factory used when XRPL_ENABLE_TELEMETRY is not defined.
+    Unconditionally returns a NullTelemetry instance.
+*/
+#ifndef XRPL_ENABLE_TELEMETRY
+std::unique_ptr<Telemetry>
+make_Telemetry(Telemetry::Setup const& setup, beast::Journal)
+{
+    return std::make_unique<NullTelemetry>(setup);
+}
+#endif
+
+}  // namespace telemetry
+}  // namespace xrpl
--- a/src/libxrpl/telemetry/Telemetry.cpp
+++ b/src/libxrpl/telemetry/Telemetry.cpp
@@ -0,0 +1,364 @@
+/** OpenTelemetry SDK implementation of the Telemetry interface.
+
+    Compiled only when XRPL_ENABLE_TELEMETRY is defined (via CMake
+    telemetry=ON). Contains:
+
+      - TelemetryImpl: configures the OTel SDK with an OTLP/HTTP exporter,
+        batch span processor, trace-ID-ratio sampler, and resource attributes.
+      - NullTelemetryOtel: no-op fallback used when telemetry is compiled in
+        but disabled at runtime (enabled=0 in config).
+      - make_Telemetry(): factory that selects the appropriate implementation.
+*/
+
+#ifdef XRPL_ENABLE_TELEMETRY
+
+#include <xrpl/basics/Log.h>
+#include <xrpl/telemetry/Telemetry.h>
+
+#include <opentelemetry/common/attribute_value.h>
+#include <opentelemetry/exporters/otlp/otlp_http_exporter_factory.h>
+#include <opentelemetry/exporters/otlp/otlp_http_exporter_options.h>
+#include <opentelemetry/sdk/resource/semantic_conventions.h>
+#include <opentelemetry/sdk/trace/batch_span_processor_factory.h>
+#include <opentelemetry/sdk/trace/batch_span_processor_options.h>
+#include <opentelemetry/sdk/trace/sampler.h>
+#include <opentelemetry/sdk/trace/samplers/trace_id_ratio.h>
+#include <opentelemetry/sdk/trace/tracer_provider.h>
+#include <opentelemetry/sdk/trace/tracer_provider_factory.h>
+#include <opentelemetry/trace/noop.h>
+#include <opentelemetry/trace/provider.h>
+#include <opentelemetry/trace/span_context.h>
+
+#include <map>
+
+namespace xrpl {
+namespace telemetry {
+
+namespace {
+
+namespace trace_api = opentelemetry::trace;
+namespace trace_sdk = opentelemetry::sdk::trace;
+namespace otlp_http = opentelemetry::exporter::otlp;
+namespace resource = opentelemetry::sdk::resource;
+
+/** No-op implementation used when XRPL_ENABLE_TELEMETRY is defined but
+    setup.enabled is false at runtime.
+
+    Lives in the anonymous namespace so there is no ODR conflict with the
+    NullTelemetry in NullTelemetry.cpp.
+*/
+class NullTelemetryOtel : public Telemetry
+{
+    /** Retained configuration (unused, kept for diagnostic access). */
+    Setup const setup_;
+
+public:
+    explicit NullTelemetryOtel(Setup const& setup) : setup_(setup)
+    {
+    }
+
+    void
+    start() override
+    {
+    }
+
+    void
+    stop() override
+    {
+    }
+
+    bool
+    isEnabled() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTraceTransactions() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTraceConsensus() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTraceRpc() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTracePeer() const override
+    {
+        return false;
+    }
+
+    bool
+    shouldTraceLedger() const override
+    {
+        return false;
+    }
+
+    std::string const&
+    getConsensusTraceStrategy() const override
+    {
+        return setup_.consensusTraceStrategy;
+    }
+
+    opentelemetry::nostd::shared_ptr<trace_api::Tracer>
+    getTracer(std::string_view) override
+    {
+        static auto noopTracer =
+            opentelemetry::nostd::shared_ptr<trace_api::Tracer>(new trace_api::NoopTracer());
+        return noopTracer;
+    }
+
+    opentelemetry::nostd::shared_ptr<trace_api::Span>
+    startSpan(std::string_view, trace_api::SpanKind) override
+    {
+        return opentelemetry::nostd::shared_ptr<trace_api::Span>(new trace_api::NoopSpan(nullptr));
+    }
+
+    opentelemetry::nostd::shared_ptr<trace_api::Span>
+    startSpan(std::string_view, opentelemetry::context::Context const&, trace_api::SpanKind)
+        override
+    {
+        return opentelemetry::nostd::shared_ptr<trace_api::Span>(new trace_api::NoopSpan(nullptr));
+    }
+
+    /** No-op: returns a NoopSpan, ignoring links. */
+    opentelemetry::nostd::shared_ptr<trace_api::Span>
+    startSpan(
+        std::string_view,
+        opentelemetry::context::Context const&,
+        std::vector<std::pair<
+            trace_api::SpanContext,
+            std::vector<std::pair<std::string, opentelemetry::common::AttributeValue>>>> const&,
+        trace_api::SpanKind) override
+    {
+        return opentelemetry::nostd::shared_ptr<trace_api::Span>(new trace_api::NoopSpan(nullptr));
+    }
+};
+
+/** Full OTel SDK implementation that exports trace spans via OTLP/HTTP.
+
+    Configures an OTLP/HTTP exporter, batch span processor,
+    TraceIdRatioBasedSampler, and resource attributes on start().
+*/
+class TelemetryImpl : public Telemetry
+{
+    /** Configuration from the [telemetry] config section.
+        Non-const so setServiceInstanceId() can update the instance ID
+        before start() creates the OTel resource.
+    */
+    Setup setup_;
+
+    /** Journal used for log output during start/stop. */
+    beast::Journal const journal_;
+
+    /** The SDK TracerProvider that owns the export pipeline.
+
+        Held as std::shared_ptr so we can call ForceFlush() on shutdown.
+        Wrapped in a nostd::shared_ptr when registered as the global provider.
+    */
+    std::shared_ptr<trace_sdk::TracerProvider> sdkProvider_;
+
+public:
+    TelemetryImpl(Setup const& setup, beast::Journal journal) : setup_(setup), journal_(journal)
+    {
+    }
+
+    void
+    setServiceInstanceId(std::string const& id) override
+    {
+        setup_.serviceInstanceId = id;
+    }
+
+    void
+    start() override
+    {
+        JLOG(journal_.info()) << "Telemetry starting: endpoint=" << setup_.exporterEndpoint
+                              << " sampling=" << setup_.samplingRatio;
+
+        // Configure OTLP HTTP exporter
+        otlp_http::OtlpHttpExporterOptions exporterOpts;
+        exporterOpts.url = setup_.exporterEndpoint;
+        if (setup_.useTls)
+            exporterOpts.ssl_ca_cert_path = setup_.tlsCertPath;
+
+        auto exporter = otlp_http::OtlpHttpExporterFactory::Create(exporterOpts);
+
+        // Configure batch processor
+        trace_sdk::BatchSpanProcessorOptions processorOpts;
+        processorOpts.max_queue_size = setup_.maxQueueSize;
+        processorOpts.schedule_delay_millis = std::chrono::milliseconds(setup_.batchDelay);
+        processorOpts.max_export_batch_size = setup_.batchSize;
+
+        auto processor =
+            trace_sdk::BatchSpanProcessorFactory::Create(std::move(exporter), processorOpts);
+
+        // Configure resource attributes
+        auto resourceAttrs = resource::Resource::Create({
+            {resource::SemanticConventions::kServiceName, setup_.serviceName},
+            {resource::SemanticConventions::kServiceVersion, setup_.serviceVersion},
+            {resource::SemanticConventions::kServiceInstanceId, setup_.serviceInstanceId},
+            {"xrpl.network.id", static_cast<int64_t>(setup_.networkId)},
+            {"xrpl.network.type", setup_.networkType},
+        });
+
+        // Configure sampler
+        auto sampler = std::make_unique<trace_sdk::TraceIdRatioBasedSampler>(setup_.samplingRatio);
+
+        // Create TracerProvider
+        sdkProvider_ = trace_sdk::TracerProviderFactory::Create(
+            std::move(processor), resourceAttrs, std::move(sampler));
+
+        // Set as global provider
+        trace_api::Provider::SetTracerProvider(
+            opentelemetry::nostd::shared_ptr<trace_api::TracerProvider>(sdkProvider_));
+
+        JLOG(journal_.info()) << "Telemetry started successfully";
+    }
+
+    void
+    stop() override
+    {
+        JLOG(journal_.info()) << "Telemetry stopping";
+        if (sdkProvider_)
+        {
+            // Force flush before shutdown
+            sdkProvider_->ForceFlush();
+            sdkProvider_.reset();
+            trace_api::Provider::SetTracerProvider(
+                opentelemetry::nostd::shared_ptr<trace_api::TracerProvider>(
+                    new trace_api::NoopTracerProvider()));
+        }
+        JLOG(journal_.info()) << "Telemetry stopped";
+    }
+
+    bool
+    isEnabled() const override
+    {
+        return true;
+    }
+
+    bool
+    shouldTraceTransactions() const override
+    {
+        return setup_.traceTransactions;
+    }
+
+    bool
+    shouldTraceConsensus() const override
+    {
+        return setup_.traceConsensus;
+    }
+
+    bool
+    shouldTraceRpc() const override
+    {
+        return setup_.traceRpc;
+    }
+
+    bool
+    shouldTracePeer() const override
+    {
+        return setup_.tracePeer;
+    }
+
+    bool
+    shouldTraceLedger() const override
+    {
+        return setup_.traceLedger;
+    }
+
+    std::string const&
+    getConsensusTraceStrategy() const override
+    {
+        return setup_.consensusTraceStrategy;
+    }
+
+    opentelemetry::nostd::shared_ptr<trace_api::Tracer>
+    getTracer(std::string_view name) override
+    {
+        if (!sdkProvider_)
+            return trace_api::Provider::GetTracerProvider()->GetTracer(std::string(name));
+        return sdkProvider_->GetTracer(std::string(name));
+    }
+
+    opentelemetry::nostd::shared_ptr<trace_api::Span>
+    startSpan(std::string_view name, trace_api::SpanKind kind) override
+    {
+        auto tracer = getTracer("rippled");
+        trace_api::StartSpanOptions opts;
+        opts.kind = kind;
+        return tracer->StartSpan(std::string(name), opts);
+    }
+
+    opentelemetry::nostd::shared_ptr<trace_api::Span>
+    startSpan(
+        std::string_view name,
+        opentelemetry::context::Context const& parentContext,
+        trace_api::SpanKind kind) override
+    {
+        auto tracer = getTracer("rippled");
+        trace_api::StartSpanOptions opts;
+        opts.kind = kind;
+        opts.parent = parentContext;
+        return tracer->StartSpan(std::string(name), opts);
+    }
+
+    /** Start a span with explicit parent context and span links.
+
+        Links are passed as the third argument to Tracer::StartSpan(),
+        which accepts any type satisfying is_span_context_kv_iterable
+        (a container of pairs where .first is SpanContext and .second is
+        a key-value iterable).
+
+        @param name           Span name.
+        @param parentContext  The parent span's context.
+        @param links          Span links for follows-from relationships.
+        @param kind           The span kind.
+        @return A shared pointer to the new Span.
+    */
+    opentelemetry::nostd::shared_ptr<trace_api::Span>
+    startSpan(
+        std::string_view name,
+        opentelemetry::context::Context const& parentContext,
+        std::vector<std::pair<
+            trace_api::SpanContext,
+            std::vector<std::pair<std::string, opentelemetry::common::AttributeValue>>>> const&
+            links,
+        trace_api::SpanKind kind) override
+    {
+        auto tracer = getTracer("rippled");
+        trace_api::StartSpanOptions opts;
+        opts.kind = kind;
+        opts.parent = parentContext;
+        // Links are passed as a separate parameter to StartSpan;
+        // the SDK wraps them in a SpanContextKeyValueIterableView.
+        // Empty attributes map is passed explicitly to select the
+        // template overload that accepts (name, attributes, links, opts).
+        std::map<std::string, opentelemetry::common::AttributeValue> emptyAttrs;
+        return tracer->StartSpan(std::string(name), emptyAttrs, links, opts);
+    }
+};
+
+}  // namespace
+
+std::unique_ptr<Telemetry>
+make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal)
+{
+    if (setup.enabled)
+        return std::make_unique<TelemetryImpl>(setup, journal);
+    return std::make_unique<NullTelemetryOtel>(setup);
+}
+
+}  // namespace telemetry
+}  // namespace xrpl
+
+#endif  // XRPL_ENABLE_TELEMETRY
--- a/src/libxrpl/telemetry/TelemetryConfig.cpp
+++ b/src/libxrpl/telemetry/TelemetryConfig.cpp
@@ -0,0 +1,67 @@
+/** Parser for the [telemetry] section of xrpld.cfg.
+
+    Reads configuration values from the config file and populates a
+    Telemetry::Setup struct. All options have sensible defaults so the
+    section can be minimal or omitted entirely.
+
+    See cfg/xrpld-example.cfg for the full list of available options.
+*/
+
+#include <xrpl/telemetry/Telemetry.h>
+
+#include <algorithm>
+
+namespace xrpl {
+namespace telemetry {
+
+Telemetry::Setup
+setup_Telemetry(
+    Section const& section,
+    std::string const& nodePublicKey,
+    std::string const& version)
+{
+    Telemetry::Setup setup;
+
+    setup.enabled = section.value_or<int>("enabled", 0) != 0;
+    setup.serviceName = section.value_or<std::string>("service_name", "rippled");
+    setup.serviceVersion = version;
+    setup.serviceInstanceId = section.value_or<std::string>("service_instance_id", nodePublicKey);
+
+    setup.exporterType = section.value_or<std::string>("exporter", "otlp_http");
+    setup.exporterEndpoint =
+        section.value_or<std::string>("endpoint", "http://localhost:4318/v1/traces");
+
+    setup.useTls = section.value_or<int>("use_tls", 0) != 0;
+    setup.tlsCertPath = section.value_or<std::string>("tls_ca_cert", "");
+
+    setup.samplingRatio = std::clamp(section.value_or<double>("sampling_ratio", 1.0), 0.0, 1.0);
+
+    setup.batchSize = section.value_or<std::uint32_t>("batch_size", 512u);
+    setup.batchDelay =
+        std::chrono::milliseconds{section.value_or<std::uint32_t>("batch_delay_ms", 5000u)};
+    setup.maxQueueSize = section.value_or<std::uint32_t>("max_queue_size", 2048u);
+
+    setup.traceTransactions = section.value_or<int>("trace_transactions", 1) != 0;
+    setup.traceConsensus = section.value_or<int>("trace_consensus", 1) != 0;
+    setup.traceRpc = section.value_or<int>("trace_rpc", 1) != 0;
+    setup.tracePeer = section.value_or<int>("trace_peer", 0) != 0;
+    setup.traceLedger = section.value_or<int>("trace_ledger", 1) != 0;
+
+    // Consensus tracing strategy: "deterministic" (shared trace_id derived
+    // from previousLedger.id()) or "attribute" (random trace_id with
+    // ledger_id stored as a span attribute).
+    setup.consensusTraceStrategy =
+        section.value_or<std::string>("consensus_trace_strategy", "deterministic");
+
+    if (setup.consensusTraceStrategy != "deterministic" &&
+        setup.consensusTraceStrategy != "attribute")
+    {
+        // Fall back to default if the value is unrecognised.
+        setup.consensusTraceStrategy = "deterministic";
+    }
+
+    return setup;
+}
+
+}  // namespace telemetry
+}  // namespace xrpl
--- a/src/test/app/Vault_test.cpp
+++ b/src/test/app/Vault_test.cpp
@@ -5213,7 +5213,6 @@ class Vault_test : public beast::unit_test::suite
            env.close();

            // 2. Mantissa larger than uint64 max
-            env.set_parse_failure_expected(true);
            try
            {
                tx[sfAssetsMaximum] = "18446744073709551617e5";  // uint64 max + 1
@@ -5224,9 +5223,10 @@ class Vault_test : public beast::unit_test::suite
            {
                using namespace std::string_literals;
                BEAST_EXPECT(
-                    e.what() == "invalidParamsField 'tx_json.AssetsMaximum' has invalid data."s);
+                    e.what() ==
+                    "invalidParamsField 'tx_json.AssetsMaximum' has invalid "
+                    "data."s);
            }
-            env.set_parse_failure_expected(false);
        }
    }

--- a/src/test/csf/Peer.h
+++ b/src/test/csf/Peer.h
@@ -11,6 +11,10 @@
 #include <xrpld/consensus/Consensus.h>
 #include <xrpld/consensus/Validations.h>

+#ifdef XRPL_ENABLE_TELEMETRY
+#include <xrpl/telemetry/Telemetry.h>
+#endif
+
 #include <xrpl/beast/utility/WrappedSink.h>
 #include <xrpl/protocol/PublicKey.h>

@@ -618,6 +622,22 @@ struct Peer
    {
    }

+#ifdef XRPL_ENABLE_TELEMETRY
+    /** Provide telemetry access for the Consensus template.
+     *
+     *  The test Peer adaptor uses a static disabled NullTelemetry instance
+     *  so that all shouldTrace*() checks return false and no spans are
+     *  created during simulation tests.
+     */
+    telemetry::Telemetry&
+    getTelemetry()
+    {
+        static auto tel = make_Telemetry(
+            telemetry::Telemetry::Setup{}, beast::Journal{beast::Journal::getNullSink()});
+        return *tel;
+    }
+#endif
+
    // Share a message by broadcasting to all connected peers
    template <class M>
    void
--- a/src/test/nodestore/TestBase.h
+++ b/src/test/nodestore/TestBase.h
@@ -138,7 +138,7 @@ public:
        {
            std::shared_ptr<NodeObject> object;

-            Status const status = backend.fetch(batch[i]->getHash(), &object);
+            Status const status = backend.fetch(batch[i]->getHash().cbegin(), &object);

            BEAST_EXPECT(status == ok);

@@ -158,7 +158,7 @@ public:
        {
            std::shared_ptr<NodeObject> object;

-            Status const status = backend.fetch(batch[i]->getHash(), &object);
+            Status const status = backend.fetch(batch[i]->getHash().cbegin(), &object);

            BEAST_EXPECT(status == notFound);
        }
--- a/src/test/nodestore/Timing_test.cpp
+++ b/src/test/nodestore/Timing_test.cpp
@@ -315,7 +315,7 @@ public:
                    std::shared_ptr<NodeObject> obj;
                    std::shared_ptr<NodeObject> result;
                    obj = seq1_.obj(dist_(gen_));
-                    backend_.fetch(obj->getHash(), &result);
+                    backend_.fetch(obj->getHash().data(), &result);
                    suite_.expect(result && isSame(result, obj));
                }
                catch (std::exception const& e)
@@ -378,9 +378,9 @@ public:
            {
                try
                {
-                    auto const hash = seq2_.key(i);
+                    auto const key = seq2_.key(i);
                    std::shared_ptr<NodeObject> result;
-                    backend_.fetch(hash, &result);
+                    backend_.fetch(key.data(), &result);
                    suite_.expect(!result);
                }
                catch (std::exception const& e)
@@ -450,9 +450,9 @@ public:
                {
                    if (rand_(gen_) < missingNodePercent)
                    {
-                        auto const hash = seq2_.key(dist_(gen_));
+                        auto const key = seq2_.key(dist_(gen_));
                        std::shared_ptr<NodeObject> result;
-                        backend_.fetch(hash, &result);
+                        backend_.fetch(key.data(), &result);
                        suite_.expect(!result);
                    }
                    else
@@ -460,7 +460,7 @@ public:
                        std::shared_ptr<NodeObject> obj;
                        std::shared_ptr<NodeObject> result;
                        obj = seq1_.obj(dist_(gen_));
-                        backend_.fetch(obj->getHash(), &result);
+                        backend_.fetch(obj->getHash().data(), &result);
                        suite_.expect(result && isSame(result, obj));
                    }
                }
@@ -541,7 +541,8 @@ public:
                        std::shared_ptr<NodeObject> result;
                        auto const j = older_(gen_);
                        obj = seq1_.obj(j);
-                        backend_.fetch(obj->getHash(), &result);
+                        std::shared_ptr<NodeObject> result1;
+                        backend_.fetch(obj->getHash().data(), &result);
                        suite_.expect(result != nullptr);
                        suite_.expect(isSame(result, obj));
                    }
@@ -560,7 +561,7 @@ public:
                                std::shared_ptr<NodeObject> result;
                                auto const j = recent_(gen_);
                                obj = seq1_.obj(j);
-                                backend_.fetch(obj->getHash(), &result);
+                                backend_.fetch(obj->getHash().data(), &result);
                                suite_.expect(!result || isSame(result, obj));
                                break;
                            }
--- a/src/tests/libxrpl/CMakeLists.txt
+++ b/src/tests/libxrpl/CMakeLists.txt
@@ -53,3 +53,14 @@ if(NOT WIN32)
    target_link_libraries(xrpl.test.net PRIVATE xrpl.imports.test)
    add_dependencies(xrpl.tests xrpl.test.net)
 endif()
+
+xrpl_add_test(telemetry)
+target_link_libraries(xrpl.test.telemetry PRIVATE xrpl.imports.test)
+target_include_directories(xrpl.test.telemetry PRIVATE ${CMAKE_SOURCE_DIR}/src)
+if(telemetry)
+    target_link_libraries(
+        xrpl.test.telemetry
+        PRIVATE opentelemetry-cpp::opentelemetry-cpp
+    )
+endif()
+add_dependencies(xrpl.tests xrpl.test.telemetry)
--- a/src/tests/libxrpl/basics/MallocTrim.cpp
+++ b/src/tests/libxrpl/basics/MallocTrim.cpp
@@ -1,209 +0,0 @@
-#include <xrpl/basics/MallocTrim.h>
-
-#include <boost/predef.h>
-
-#include <gtest/gtest.h>
-
-using namespace xrpl;
-
-// cSpell:ignore statm
-
-#if defined(__GLIBC__) && BOOST_OS_LINUX
-namespace xrpl::detail {
-long
-parseStatmRSSkB(std::string const& statm);
-}  // namespace xrpl::detail
-#endif
-
-TEST(MallocTrimReport, structure)
-{
-    // Test default construction
-    MallocTrimReport report;
-    EXPECT_EQ(report.supported, false);
-    EXPECT_EQ(report.trimResult, -1);
-    EXPECT_EQ(report.rssBeforeKB, -1);
-    EXPECT_EQ(report.rssAfterKB, -1);
-    EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1});
-    EXPECT_EQ(report.minfltDelta, -1);
-    EXPECT_EQ(report.majfltDelta, -1);
-    EXPECT_EQ(report.deltaKB(), 0);
-
-    // Test deltaKB calculation - memory freed
-    report.rssBeforeKB = 1000;
-    report.rssAfterKB = 800;
-    EXPECT_EQ(report.deltaKB(), -200);
-
-    // Test deltaKB calculation - memory increased
-    report.rssBeforeKB = 500;
-    report.rssAfterKB = 600;
-    EXPECT_EQ(report.deltaKB(), 100);
-
-    // Test deltaKB calculation - no change
-    report.rssBeforeKB = 1234;
-    report.rssAfterKB = 1234;
-    EXPECT_EQ(report.deltaKB(), 0);
-}
-
-#if defined(__GLIBC__) && BOOST_OS_LINUX
-TEST(parseStatmRSSkB, standard_format)
-{
-    using xrpl::detail::parseStatmRSSkB;
-
-    // Test standard format: size resident shared text lib data dt
-    // Assuming 4KB page size: resident=1000 pages = 4000 KB
-    {
-        std::string statm = "25365 1000 2377 0 0 5623 0";
-        long result = parseStatmRSSkB(statm);
-        // Note: actual result depends on system page size
-        // On most systems it's 4KB, so 1000 pages = 4000 KB
-        EXPECT_GT(result, 0);
-    }
-
-    // Test with newline
-    {
-        std::string statm = "12345 2000 1234 0 0 3456 0\n";
-        long result = parseStatmRSSkB(statm);
-        EXPECT_GT(result, 0);
-    }
-
-    // Test with tabs
-    {
-        std::string statm = "12345\t2000\t1234\t0\t0\t3456\t0";
-        long result = parseStatmRSSkB(statm);
-        EXPECT_GT(result, 0);
-    }
-
-    // Test zero resident pages
-    {
-        std::string statm = "25365 0 2377 0 0 5623 0";
-        long result = parseStatmRSSkB(statm);
-        EXPECT_EQ(result, 0);
-    }
-
-    // Test with extra whitespace
-    {
-        std::string statm = "  25365   1000   2377  ";
-        long result = parseStatmRSSkB(statm);
-        EXPECT_GT(result, 0);
-    }
-
-    // Test empty string
-    {
-        std::string statm;
-        long result = parseStatmRSSkB(statm);
-        EXPECT_EQ(result, -1);
-    }
-
-    // Test malformed data (only one field)
-    {
-        std::string statm = "25365";
-        long result = parseStatmRSSkB(statm);
-        EXPECT_EQ(result, -1);
-    }
-
-    // Test malformed data (non-numeric)
-    {
-        std::string statm = "abc def ghi";
-        long result = parseStatmRSSkB(statm);
-        EXPECT_EQ(result, -1);
-    }
-
-    // Test malformed data (second field non-numeric)
-    {
-        std::string statm = "25365 abc 2377";
-        long result = parseStatmRSSkB(statm);
-        EXPECT_EQ(result, -1);
-    }
-}
-#endif
-
-TEST(mallocTrim, without_debug_logging)
-{
-    beast::Journal journal{beast::Journal::getNullSink()};
-
-    MallocTrimReport report = mallocTrim("without_debug", journal);
-
-#if defined(__GLIBC__) && BOOST_OS_LINUX
-    EXPECT_EQ(report.supported, true);
-    EXPECT_GE(report.trimResult, 0);
-    EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1});
-    EXPECT_EQ(report.minfltDelta, -1);
-    EXPECT_EQ(report.majfltDelta, -1);
-#else
-    EXPECT_EQ(report.supported, false);
-    EXPECT_EQ(report.trimResult, -1);
-    EXPECT_EQ(report.rssBeforeKB, -1);
-    EXPECT_EQ(report.rssAfterKB, -1);
-    EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1});
-    EXPECT_EQ(report.minfltDelta, -1);
-    EXPECT_EQ(report.majfltDelta, -1);
-#endif
-}
-
-TEST(mallocTrim, empty_tag)
-{
-    beast::Journal journal{beast::Journal::getNullSink()};
-    MallocTrimReport report = mallocTrim("", journal);
-
-#if defined(__GLIBC__) && BOOST_OS_LINUX
-    EXPECT_EQ(report.supported, true);
-    EXPECT_GE(report.trimResult, 0);
-#else
-    EXPECT_EQ(report.supported, false);
-#endif
-}
-
-TEST(mallocTrim, with_debug_logging)
-{
-    struct DebugSink : public beast::Journal::Sink
-    {
-        DebugSink() : Sink(beast::severities::kDebug, false)
-        {
-        }
-        void
-        write(beast::severities::Severity, std::string const&) override
-        {
-        }
-        void
-        writeAlways(beast::severities::Severity, std::string const&) override
-        {
-        }
-    };
-
-    DebugSink sink;
-    beast::Journal journal{sink};
-
-    MallocTrimReport report = mallocTrim("debug_test", journal);
-
-#if defined(__GLIBC__) && BOOST_OS_LINUX
-    EXPECT_EQ(report.supported, true);
-    EXPECT_GE(report.trimResult, 0);
-    EXPECT_GE(report.durationUs.count(), 0);
-    EXPECT_GE(report.minfltDelta, 0);
-    EXPECT_GE(report.majfltDelta, 0);
-#else
-    EXPECT_EQ(report.supported, false);
-    EXPECT_EQ(report.trimResult, -1);
-    EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1});
-    EXPECT_EQ(report.minfltDelta, -1);
-    EXPECT_EQ(report.majfltDelta, -1);
-#endif
-}
-
-TEST(mallocTrim, repeated_calls)
-{
-    beast::Journal journal{beast::Journal::getNullSink()};
-
-    // Call malloc_trim multiple times to ensure it's safe
-    for (int i = 0; i < 5; ++i)
-    {
-        MallocTrimReport report = mallocTrim("iteration_" + std::to_string(i), journal);
-
-#if defined(__GLIBC__) && BOOST_OS_LINUX
-        EXPECT_EQ(report.supported, true);
-        EXPECT_GE(report.trimResult, 0);
-#else
-        EXPECT_EQ(report.supported, false);
-#endif
-    }
-}
--- a/src/tests/libxrpl/telemetry/TelemetryConfig.cpp
+++ b/src/tests/libxrpl/telemetry/TelemetryConfig.cpp
@@ -0,0 +1,111 @@
+#include <xrpl/basics/BasicConfig.h>
+#include <xrpl/telemetry/Telemetry.h>
+
+#include <gtest/gtest.h>
+
+#include <chrono>
+
+using namespace xrpl;
+
+TEST(TelemetryConfig, setup_defaults)
+{
+    telemetry::Telemetry::Setup s;
+    EXPECT_FALSE(s.enabled);
+    EXPECT_EQ(s.serviceName, "rippled");
+    EXPECT_TRUE(s.serviceVersion.empty());
+    EXPECT_TRUE(s.serviceInstanceId.empty());
+    EXPECT_EQ(s.exporterType, "otlp_http");
+    EXPECT_EQ(s.exporterEndpoint, "http://localhost:4318/v1/traces");
+    EXPECT_FALSE(s.useTls);
+    EXPECT_TRUE(s.tlsCertPath.empty());
+    EXPECT_DOUBLE_EQ(s.samplingRatio, 1.0);
+    EXPECT_EQ(s.batchSize, 512u);
+    EXPECT_EQ(s.batchDelay, std::chrono::milliseconds{5000});
+    EXPECT_EQ(s.maxQueueSize, 2048u);
+    EXPECT_EQ(s.networkId, 0u);
+    EXPECT_EQ(s.networkType, "mainnet");
+    EXPECT_TRUE(s.traceTransactions);
+    EXPECT_TRUE(s.traceConsensus);
+    EXPECT_TRUE(s.traceRpc);
+    EXPECT_FALSE(s.tracePeer);
+    EXPECT_TRUE(s.traceLedger);
+}
+
+TEST(TelemetryConfig, parse_empty_section)
+{
+    Section section;
+    auto setup = telemetry::setup_Telemetry(section, "nHUtest123", "2.0.0");
+
+    EXPECT_FALSE(setup.enabled);
+    EXPECT_EQ(setup.serviceName, "rippled");
+    EXPECT_EQ(setup.serviceVersion, "2.0.0");
+    EXPECT_EQ(setup.serviceInstanceId, "nHUtest123");
+    EXPECT_EQ(setup.exporterType, "otlp_http");
+    EXPECT_DOUBLE_EQ(setup.samplingRatio, 1.0);
+    EXPECT_TRUE(setup.traceRpc);
+    EXPECT_TRUE(setup.traceTransactions);
+    EXPECT_TRUE(setup.traceConsensus);
+    EXPECT_FALSE(setup.tracePeer);
+    EXPECT_TRUE(setup.traceLedger);
+}
+
+TEST(TelemetryConfig, parse_full_section)
+{
+    Section section;
+    section.set("enabled", "1");
+    section.set("service_name", "my-rippled");
+    section.set("service_instance_id", "custom-id");
+    section.set("exporter", "otlp_http");
+    section.set("endpoint", "http://collector:4318/v1/traces");
+    section.set("use_tls", "1");
+    section.set("tls_ca_cert", "/etc/ssl/ca.pem");
+    section.set("sampling_ratio", "0.5");
+    section.set("batch_size", "256");
+    section.set("batch_delay_ms", "3000");
+    section.set("max_queue_size", "4096");
+    section.set("trace_transactions", "0");
+    section.set("trace_consensus", "0");
+    section.set("trace_rpc", "1");
+    section.set("trace_peer", "1");
+    section.set("trace_ledger", "0");
+
+    auto setup = telemetry::setup_Telemetry(section, "nHUtest123", "2.0.0");
+
+    EXPECT_TRUE(setup.enabled);
+    EXPECT_EQ(setup.serviceName, "my-rippled");
+    EXPECT_EQ(setup.serviceInstanceId, "custom-id");
+    EXPECT_EQ(setup.exporterType, "otlp_http");
+    EXPECT_EQ(setup.exporterEndpoint, "http://collector:4318/v1/traces");
+    EXPECT_TRUE(setup.useTls);
+    EXPECT_EQ(setup.tlsCertPath, "/etc/ssl/ca.pem");
+    EXPECT_DOUBLE_EQ(setup.samplingRatio, 0.5);
+    EXPECT_EQ(setup.batchSize, 256u);
+    EXPECT_EQ(setup.batchDelay, std::chrono::milliseconds{3000});
+    EXPECT_EQ(setup.maxQueueSize, 4096u);
+    EXPECT_FALSE(setup.traceTransactions);
+    EXPECT_FALSE(setup.traceConsensus);
+    EXPECT_TRUE(setup.traceRpc);
+    EXPECT_TRUE(setup.tracePeer);
+    EXPECT_FALSE(setup.traceLedger);
+}
+
+TEST(TelemetryConfig, null_telemetry_factory)
+{
+    telemetry::Telemetry::Setup setup;
+    setup.enabled = false;
+
+    beast::Journal::Sink& sink = beast::Journal::getNullSink();
+    beast::Journal j(sink);
+    auto tel = telemetry::make_Telemetry(setup, j);
+    EXPECT_TRUE(tel != nullptr);
+    EXPECT_FALSE(tel->isEnabled());
+    EXPECT_FALSE(tel->shouldTraceRpc());
+    EXPECT_FALSE(tel->shouldTraceTransactions());
+    EXPECT_FALSE(tel->shouldTraceConsensus());
+    EXPECT_FALSE(tel->shouldTracePeer());
+    EXPECT_FALSE(tel->shouldTraceLedger());
+
+    // start/stop should be no-ops without crashing
+    tel->start();
+    tel->stop();
+}
--- a/src/tests/libxrpl/telemetry/TraceContextPropagator.cpp
+++ b/src/tests/libxrpl/telemetry/TraceContextPropagator.cpp
@@ -0,0 +1,155 @@
+#include <gtest/gtest.h>
+
+#ifdef XRPL_ENABLE_TELEMETRY
+
+#include <xrpl/telemetry/TraceContextPropagator.h>
+
+#include <opentelemetry/context/context.h>
+#include <opentelemetry/nostd/span.h>
+#include <opentelemetry/trace/context.h>
+#include <opentelemetry/trace/default_span.h>
+#include <opentelemetry/trace/span_context.h>
+#include <opentelemetry/trace/trace_flags.h>
+#include <opentelemetry/trace/trace_id.h>
+
+#include <cstring>
+
+namespace trace = opentelemetry::trace;
+
+TEST(TraceContextPropagator, round_trip)
+{
+    std::uint8_t traceIdBuf[16] = {
+        0x01,
+        0x02,
+        0x03,
+        0x04,
+        0x05,
+        0x06,
+        0x07,
+        0x08,
+        0x09,
+        0x0a,
+        0x0b,
+        0x0c,
+        0x0d,
+        0x0e,
+        0x0f,
+        0x10};
+    std::uint8_t spanIdBuf[8] = {0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x11, 0x22};
+
+    trace::TraceId traceId(opentelemetry::nostd::span<uint8_t const, 16>(traceIdBuf, 16));
+    trace::SpanId spanId(opentelemetry::nostd::span<uint8_t const, 8>(spanIdBuf, 8));
+    trace::TraceFlags flags(trace::TraceFlags::kIsSampled);
+    trace::SpanContext spanCtx(traceId, spanId, flags, true);
+
+    auto ctx = opentelemetry::context::Context{}.SetValue(
+        trace::kSpanKey,
+        opentelemetry::nostd::shared_ptr<trace::Span>(new trace::DefaultSpan(spanCtx)));
+
+    protocol::TraceContext proto;
+    xrpl::telemetry::injectToProtobuf(ctx, proto);
+
+    EXPECT_TRUE(proto.has_trace_id());
+    EXPECT_EQ(proto.trace_id().size(), 16u);
+    EXPECT_TRUE(proto.has_span_id());
+    EXPECT_EQ(proto.span_id().size(), 8u);
+    EXPECT_EQ(proto.trace_flags(), static_cast<uint32_t>(trace::TraceFlags::kIsSampled));
+    EXPECT_EQ(std::memcmp(proto.trace_id().data(), traceIdBuf, 16), 0);
+    EXPECT_EQ(std::memcmp(proto.span_id().data(), spanIdBuf, 8), 0);
+
+    auto extractedCtx = xrpl::telemetry::extractFromProtobuf(proto);
+    auto extractedSpan = trace::GetSpan(extractedCtx);
+    ASSERT_NE(extractedSpan, nullptr);
+
+    auto const& extracted = extractedSpan->GetContext();
+    EXPECT_TRUE(extracted.IsValid());
+    EXPECT_TRUE(extracted.IsRemote());
+    EXPECT_EQ(extracted.trace_id(), traceId);
+    EXPECT_EQ(extracted.span_id(), spanId);
+    EXPECT_TRUE(extracted.trace_flags().IsSampled());
+}
+
+TEST(TraceContextPropagator, extract_empty_protobuf)
+{
+    protocol::TraceContext proto;
+    auto ctx = xrpl::telemetry::extractFromProtobuf(proto);
+    auto span = trace::GetSpan(ctx);
+    if (span)
+    {
+        EXPECT_FALSE(span->GetContext().IsValid());
+    }
+}
+
+TEST(TraceContextPropagator, extract_wrong_size_trace_id)
+{
+    protocol::TraceContext proto;
+    proto.set_trace_id(std::string(8, '\x01'));
+    proto.set_span_id(std::string(8, '\xaa'));
+
+    auto ctx = xrpl::telemetry::extractFromProtobuf(proto);
+    auto span = trace::GetSpan(ctx);
+    if (span)
+    {
+        EXPECT_FALSE(span->GetContext().IsValid());
+    }
+}
+
+TEST(TraceContextPropagator, extract_wrong_size_span_id)
+{
+    protocol::TraceContext proto;
+    proto.set_trace_id(std::string(16, '\x01'));
+    proto.set_span_id(std::string(4, '\xaa'));
+
+    auto ctx = xrpl::telemetry::extractFromProtobuf(proto);
+    auto span = trace::GetSpan(ctx);
+    if (span)
+    {
+        EXPECT_FALSE(span->GetContext().IsValid());
+    }
+}
+
+TEST(TraceContextPropagator, inject_invalid_span)
+{
+    auto ctx = opentelemetry::context::Context{};
+    protocol::TraceContext proto;
+    xrpl::telemetry::injectToProtobuf(ctx, proto);
+
+    EXPECT_FALSE(proto.has_trace_id());
+    EXPECT_FALSE(proto.has_span_id());
+}
+
+TEST(TraceContextPropagator, flags_preservation)
+{
+    std::uint8_t traceIdBuf[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+    std::uint8_t spanIdBuf[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+    // Test with flags NOT sampled (flags = 0)
+    trace::TraceFlags flags(0);
+    trace::SpanContext spanCtx(
+        trace::TraceId(opentelemetry::nostd::span<uint8_t const, 16>(traceIdBuf, 16)),
+        trace::SpanId(opentelemetry::nostd::span<uint8_t const, 8>(spanIdBuf, 8)),
+        flags,
+        true);
+
+    auto ctx = opentelemetry::context::Context{}.SetValue(
+        trace::kSpanKey,
+        opentelemetry::nostd::shared_ptr<trace::Span>(new trace::DefaultSpan(spanCtx)));
+
+    protocol::TraceContext proto;
+    xrpl::telemetry::injectToProtobuf(ctx, proto);
+    EXPECT_EQ(proto.trace_flags(), 0u);
+
+    auto extracted = xrpl::telemetry::extractFromProtobuf(proto);
+    auto span = trace::GetSpan(extracted);
+    ASSERT_NE(span, nullptr);
+    EXPECT_FALSE(span->GetContext().trace_flags().IsSampled());
+}
+
+#else  // XRPL_ENABLE_TELEMETRY not defined
+
+TEST(TraceContextPropagator, compiles_without_telemetry)
+{
+    SUCCEED();
+}
+
+#endif  // XRPL_ENABLE_TELEMETRY
--- a/src/tests/libxrpl/telemetry/TracingMacros.cpp
+++ b/src/tests/libxrpl/telemetry/TracingMacros.cpp
@@ -0,0 +1,170 @@
+#include <xrpld/telemetry/TracingInstrumentation.h>
+
+#include <xrpl/telemetry/Telemetry.h>
+
+#include <gtest/gtest.h>
+
+using namespace xrpl;
+
+TEST(TracingMacros, macros_with_null_telemetry)
+{
+    telemetry::Telemetry::Setup setup;
+    setup.enabled = false;
+    beast::Journal::Sink& sink = beast::Journal::getNullSink();
+    beast::Journal j(sink);
+    auto tel = telemetry::make_Telemetry(setup, j);
+    tel->start();
+
+    // Each macro should compile and execute without crashing.
+    {
+        XRPL_TRACE_RPC(*tel, "rpc.test.command");
+        XRPL_TRACE_SET_ATTR("xrpl.rpc.command", "test");
+        XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");
+    }
+    {
+        XRPL_TRACE_TX(*tel, "tx.test.process");
+        XRPL_TRACE_SET_ATTR("xrpl.tx.hash", "abc123");
+    }
+    {
+        XRPL_TRACE_CONSENSUS(*tel, "consensus.test");
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.mode", "proposing");
+    }
+    {
+        XRPL_TRACE_PEER(*tel, "peer.test");
+    }
+    {
+        XRPL_TRACE_LEDGER(*tel, "ledger.test");
+    }
+
+    tel->stop();
+}
+
+TEST(TracingMacros, separate_scopes)
+{
+    // Multiple macros in separate scopes should not collide on
+    // the _xrpl_guard_ variable name.
+    telemetry::Telemetry::Setup setup;
+    setup.enabled = false;
+    beast::Journal::Sink& sink = beast::Journal::getNullSink();
+    beast::Journal j(sink);
+    auto tel = telemetry::make_Telemetry(setup, j);
+
+    {
+        XRPL_TRACE_RPC(*tel, "rpc.outer");
+    }
+    {
+        XRPL_TRACE_TX(*tel, "tx.inner");
+    }
+    {
+        XRPL_TRACE_CONSENSUS(*tel, "consensus.other");
+    }
+}
+
+TEST(TracingMacros, conditional_guards)
+{
+    // NullTelemetry returns false for all shouldTrace* methods.
+    // XRPL_TRACE_SET_ATTR on an empty guard must be safe.
+    telemetry::Telemetry::Setup setup;
+    setup.enabled = false;
+    beast::Journal::Sink& sink = beast::Journal::getNullSink();
+    beast::Journal j(sink);
+    auto tel = telemetry::make_Telemetry(setup, j);
+
+    EXPECT_FALSE(tel->shouldTraceRpc());
+    EXPECT_FALSE(tel->shouldTraceTransactions());
+    EXPECT_FALSE(tel->shouldTraceConsensus());
+    EXPECT_FALSE(tel->shouldTracePeer());
+    EXPECT_FALSE(tel->shouldTraceLedger());
+
+    {
+        XRPL_TRACE_RPC(*tel, "should.not.create");
+        XRPL_TRACE_SET_ATTR("key", "value");
+    }
+}
+
+TEST(TracingMacros, consensus_close_time_attributes)
+{
+    // Verify the consensus.accept.apply attribute pattern compiles and
+    // doesn't crash with NullTelemetry.  Mirrors the real instrumentation
+    // in RCLConsensus::Adaptor::doAccept().
+    telemetry::Telemetry::Setup setup;
+    setup.enabled = false;
+    beast::Journal::Sink& sink = beast::Journal::getNullSink();
+    beast::Journal j(sink);
+    auto tel = telemetry::make_Telemetry(setup, j);
+
+    {
+        XRPL_TRACE_CONSENSUS(*tel, "consensus.accept.apply");
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq", static_cast<int64_t>(42));
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.close_time", static_cast<int64_t>(780000000));
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.close_time_correct", true);
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.close_resolution_ms", static_cast<int64_t>(30000));
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.state", std::string("finished"));
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.proposing", true);
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.round_time_ms", static_cast<int64_t>(3500));
+    }
+    // close_time_correct=false path (agreed to disagree)
+    {
+        XRPL_TRACE_CONSENSUS(*tel, "consensus.accept.apply");
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.close_time_correct", false);
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.state", std::string("moved_on"));
+    }
+}
+
+#ifdef XRPL_ENABLE_TELEMETRY
+
+TEST(TracingMacros, span_guard_raii)
+{
+    telemetry::Telemetry::Setup setup;
+    setup.enabled = false;
+    beast::Journal::Sink& sink = beast::Journal::getNullSink();
+    beast::Journal j(sink);
+    auto tel = telemetry::make_Telemetry(setup, j);
+
+    auto span = tel->startSpan("test.guard");
+    {
+        telemetry::SpanGuard guard(span);
+        guard.setAttribute("key", "value");
+        guard.addEvent("test_event");
+        guard.setOk();
+    }
+}
+
+TEST(TracingMacros, span_guard_move)
+{
+    telemetry::Telemetry::Setup setup;
+    setup.enabled = false;
+    beast::Journal::Sink& sink = beast::Journal::getNullSink();
+    beast::Journal j(sink);
+    auto tel = telemetry::make_Telemetry(setup, j);
+
+    auto span = tel->startSpan("test.move");
+    std::optional<telemetry::SpanGuard> opt;
+    opt.emplace(span);
+    EXPECT_TRUE(opt.has_value());
+    opt.reset();
+}
+
+TEST(TracingMacros, span_guard_exception)
+{
+    telemetry::Telemetry::Setup setup;
+    setup.enabled = false;
+    beast::Journal::Sink& sink = beast::Journal::getNullSink();
+    beast::Journal j(sink);
+    auto tel = telemetry::make_Telemetry(setup, j);
+
+    auto span = tel->startSpan("test.exception");
+    {
+        telemetry::SpanGuard guard(span);
+        try
+        {
+            throw std::runtime_error("test error");
+        }
+        catch (std::exception const& e)
+        {
+            guard.recordException(e);
+        }
+    }
+}
+
+#endif  // XRPL_ENABLE_TELEMETRY
--- a/src/tests/libxrpl/telemetry/main.cpp
+++ b/src/tests/libxrpl/telemetry/main.cpp
@@ -0,0 +1,8 @@
+#include <gtest/gtest.h>
+
+int
+main(int argc, char** argv)
+{
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
--- a/src/xrpld/app/consensus/RCLConsensus.cpp
+++ b/src/xrpld/app/consensus/RCLConsensus.cpp
@@ -14,8 +14,19 @@
 #include <xrpld/consensus/LedgerTiming.h>
 #include <xrpld/overlay/Overlay.h>
 #include <xrpld/overlay/predicates.h>
+#include <xrpld/telemetry/TracingInstrumentation.h>

 #include <xrpl/basics/random.h>
+#ifdef XRPL_ENABLE_TELEMETRY
+#include <xrpl/crypto/csprng.h>
+#include <xrpl/telemetry/SpanGuard.h>
+
+#include <opentelemetry/trace/context.h>
+#include <opentelemetry/trace/default_span.h>
+#include <opentelemetry/trace/span_context.h>
+#include <opentelemetry/trace/trace_flags.h>
+#include <opentelemetry/trace/trace_id.h>
+#endif
 #include <xrpl/beast/core/LexicalCast.h>
 #include <xrpl/beast/utility/instrumentation.h>
 #include <xrpl/core/HashRouter.h>
@@ -32,6 +43,57 @@

 namespace xrpl {

+#ifdef XRPL_ENABLE_TELEMETRY
+namespace {
+
+/** Create an OTel context with a deterministic trace ID.
+ *
+ *  Derives the trace_id from the first 16 bytes of a uint256 ledger hash
+ *  so that all validators participating in the same consensus round
+ *  produce spans sharing the same trace_id. This enables cross-node
+ *  trace correlation in the backend without requiring explicit context
+ *  propagation over the peer protocol.
+ *
+ *  The span_id is randomly generated (8 bytes from the CSPRNG) so each
+ *  validator's root span is unique within the shared trace.
+ *
+ *  @param ledgerId  The previousLedger.id() hash for the consensus round.
+ *  @return An OTel Context containing a synthetic parent span with the
+ *          deterministic trace_id and a random span_id.
+ */
+opentelemetry::context::Context
+createDeterministicContext(uint256 const& ledgerId)
+{
+    namespace trace = opentelemetry::trace;
+
+    // Use first 16 bytes of the 256-bit ledger hash as trace ID.
+    // uint256::data() returns a const uint8_t* to 32 bytes in
+    // big-endian order; the first 16 are the most-significant half.
+    trace::TraceId traceId(opentelemetry::nostd::span<uint8_t const, 16>(ledgerId.data(), 16));
+
+    // Generate a random 8-byte span ID using the crypto PRNG.
+    uint8_t spanIdBytes[8];
+    crypto_prng()(spanIdBytes, sizeof(spanIdBytes));
+    trace::SpanId spanId(opentelemetry::nostd::span<uint8_t const, 8>(spanIdBytes, 8));
+
+    // Build a synthetic SpanContext that is sampled (flag 0x01)
+    // and not remote (originated locally).
+    trace::SpanContext syntheticCtx(
+        traceId,
+        spanId,
+        trace::TraceFlags(1),
+        /* remote = */ false);
+
+    // Wrap in a DefaultSpan and set on an empty Context via the
+    // standard kSpanKey used by the OTel SDK for context propagation.
+    return opentelemetry::context::Context{}.SetValue(
+        trace::kSpanKey,
+        opentelemetry::nostd::shared_ptr<trace::Span>(new trace::DefaultSpan(syntheticCtx)));
+}
+
+}  // namespace
+#endif  // XRPL_ENABLE_TELEMETRY
+
 RCLConsensus::RCLConsensus(
    Application& app,
    std::unique_ptr<FeeVote>&& feeVote,
@@ -171,6 +233,9 @@ RCLConsensus::Adaptor::share(RCLCxTx const& tx)
 void
 RCLConsensus::Adaptor::propose(RCLCxPeerPos::Proposal const& proposal)
 {
+    XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.proposal.send");
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.round", static_cast<int64_t>(proposal.proposeSeq()));
+
    JLOG(j_.trace()) << (proposal.isBowOut() ? "We bow out: " : "We propose: ")
                     << xrpl::to_string(proposal.prevLedger()) << " -> "
                     << xrpl::to_string(proposal.position());
@@ -273,6 +338,11 @@ RCLConsensus::Adaptor::onClose(
    NetClock::time_point const& closeTime,
    ConsensusMode mode) -> Result
 {
+    XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.ledger_close");
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.ledger.seq", static_cast<int64_t>(ledger.ledger_->header().seq + 1));
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.mode", to_string(mode).c_str());
+
    bool const wrongLCL = mode == ConsensusMode::wrongLedger;
    bool const proposing = mode == ConsensusMode::proposing;

@@ -381,6 +451,11 @@ RCLConsensus::Adaptor::onAccept(
    Json::Value&& consensusJson,
    bool const validating)
 {
+    XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.accept");
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.proposers", static_cast<int64_t>(result.proposers));
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.round_time_ms", static_cast<int64_t>(result.roundTime.read().count()));
+
    app_.getJobQueue().addJob(
        jtACCEPT,
        "AcceptLedger",
@@ -432,6 +507,57 @@ RCLConsensus::Adaptor::doAccept(
        closeTimeCorrect = true;
    }

+    /// @note This method runs on a JobQueue worker thread (jtACCEPT), not the
+    /// consensus thread where roundSpan_ is active. OTel's thread-local
+    /// context propagation does NOT cross thread boundaries, so the
+    /// consensus.accept.apply span below is standalone — it is NOT a child
+    /// of consensus.round. Cross-thread context propagation for this path
+    /// is a future enhancement (Phase 4b).
+
+    // Trace the ledger application phase with close time details.
+    // This span runs on the jtACCEPT job queue thread (posted by onAccept),
+    // separate from the consensus.accept span which fires synchronously in
+    // onAccept.  It captures the agreed-upon close time, whether validators
+    // converged on it (per avCT_CONSENSUS_PCT), the consensus outcome,
+    // parent close time, this node's own close time proposal, the number
+    // of distinct vote bins, and the resolution adaptation direction.
+    XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.accept.apply");
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq", static_cast<int64_t>(prevLedger.seq() + 1));
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.close_time",
+        static_cast<int64_t>(consensusCloseTime.time_since_epoch().count()));
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.close_time_correct", closeTimeCorrect);
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.close_resolution_ms",
+        static_cast<int64_t>(
+            std::chrono::duration_cast<std::chrono::milliseconds>(closeResolution).count()));
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.state", std::string(consensusFail ? "moved_on" : "finished"));
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.proposing", proposing);
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.round_time_ms", static_cast<int64_t>(result.roundTime.read().count()));
+    // Parent ledger's close time — enables computing close-time deltas across
+    // consecutive rounds without correlating separate spans.
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.parent_close_time",
+        static_cast<int64_t>(prevLedger.closeTime().time_since_epoch().count()));
+    // This node's own proposed close time before consensus voting.
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.close_time_self",
+        static_cast<int64_t>(rawCloseTimes.self.time_since_epoch().count()));
+    // Number of distinct close-time vote bins from peer proposals.
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.close_time_vote_bins", static_cast<int64_t>(rawCloseTimes.peers.size()));
+    // Whether close-time resolution increased (coarser), decreased (finer),
+    // or stayed the same relative to the previous ledger.
+    {
+        auto const prevRes = prevLedger.closeTimeResolution();
+        std::string dir = (closeResolution > prevRes) ? "increased"
+            : (closeResolution < prevRes)             ? "decreased"
+                                                      : "unchanged";
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.resolution_direction", std::move(dir));
+    }
+
    JLOG(j_.debug()) << "Report: Prop=" << (proposing ? "yes" : "no")
                     << " val=" << (validating_ ? "yes" : "no")
                     << " corLCL=" << (haveCorrectLCL ? "yes" : "no")
@@ -749,6 +875,17 @@ RCLConsensus::Adaptor::buildLCL(
 void
 RCLConsensus::Adaptor::validate(RCLCxLedger const& ledger, RCLTxSet const& txns, bool proposing)
 {
+    /// @note This method is called from doAccept(), which runs on a JobQueue
+    /// worker thread (jtACCEPT). The consensus.validation.send span is
+    /// therefore standalone — NOT a child of consensus.round. A span link
+    /// to the round span is added below to establish the follows-from
+    /// relationship without requiring parent-child context propagation.
+#ifdef XRPL_ENABLE_TELEMETRY
+    std::optional<telemetry::SpanGuard> _xrpl_guard_ = createValidationSpan();
+#endif
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq", static_cast<int64_t>(ledger.seq()));
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.proposing", proposing);
+
    using namespace std::chrono_literals;

    auto validationTime = app_.timeKeeper().closeTime();
@@ -836,6 +973,13 @@ RCLConsensus::Adaptor::validate(RCLCxLedger const& ledger, RCLTxSet const& txns,
 void
 RCLConsensus::Adaptor::onModeChange(ConsensusMode before, ConsensusMode after)
 {
+    // Trace mode transitions as short-lived spans for visibility in the
+    // trace backend. Each transition (e.g. observing -> proposing) appears
+    // as a child of the current consensus.round span.
+    XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.mode_change");
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.mode.old", to_string(before).c_str());
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.mode.new", to_string(after).c_str());
+
    JLOG(j_.info()) << "Consensus mode change before=" << to_string(before)
                    << ", after=" << to_string(after);

@@ -958,6 +1102,10 @@ RCLConsensus::Adaptor::preStartRound(RCLCxLedger const& prevLgr, hash_set<NodeID
    if (!nowTrusted.empty())
        nUnlVote_.newValidators(prevLgr.seq() + 1, nowTrusted);

+#ifdef XRPL_ENABLE_TELEMETRY
+    startRoundTracing(prevLgr);
+#endif
+
    // propose only if we're in sync with the network (and validating)
    return validating_ && synced;
 }
@@ -1001,6 +1149,104 @@ RCLConsensus::Adaptor::updateOperatingMode(std::size_t const positions) const
        app_.getOPs().setMode(OperatingMode::CONNECTED);
 }

+#ifdef XRPL_ENABLE_TELEMETRY
+telemetry::Telemetry&
+RCLConsensus::Adaptor::getTelemetry()
+{
+    return app_.getTelemetry();
+}
+
+void
+RCLConsensus::Adaptor::startRoundTracing(RCLCxLedger const& prevLgr)
+{
+    // Save the previous round's context for span links, then end the
+    // previous round span before creating a new one.
+    if (roundSpan_)
+    {
+        prevRoundContext_ = roundSpan_->context();
+        roundSpan_.reset();
+    }
+
+    auto& tel = app_.getTelemetry();
+    if (!tel.shouldTraceConsensus())
+        return;
+
+    auto const& strategy = tel.getConsensusTraceStrategy();
+
+    // Build span links to previous round (follows-from) if available.
+    // This creates a causal chain between consecutive consensus rounds
+    // in the trace backend.
+    using LinkAttr = std::pair<std::string, opentelemetry::common::AttributeValue>;
+    using SpanLink = std::pair<opentelemetry::trace::SpanContext, std::vector<LinkAttr>>;
+    std::vector<SpanLink> links;
+
+    auto prevSpan = opentelemetry::trace::GetSpan(prevRoundContext_);
+    if (prevSpan && prevSpan->GetContext().IsValid())
+    {
+        links.emplace_back(
+            prevSpan->GetContext(),
+            std::vector<LinkAttr>{{"xrpl.link.type", std::string("follows_from")}});
+    }
+
+    if (strategy == "deterministic")
+    {
+        // Derive trace_id from ledger hash so all validators in this
+        // round produce spans under the same trace.
+        auto parentCtx = createDeterministicContext(prevLgr.id());
+        roundSpan_.emplace(tel.startSpan("consensus.round", parentCtx, links));
+    }
+    else
+    {
+        // "attribute" strategy: random trace_id, correlation via
+        // the xrpl.consensus.ledger_id attribute.
+        if (links.empty())
+            roundSpan_.emplace(tel.startSpan("consensus.round"));
+        else
+        {
+            // Use an empty context as parent (new root trace).
+            roundSpan_.emplace(
+                tel.startSpan("consensus.round", opentelemetry::context::Context{}, links));
+        }
+    }
+
+    // Set standard attributes on the round span.
+    roundSpan_->setAttribute("xrpl.consensus.ledger_id", to_string(prevLgr.id()).c_str());
+    roundSpan_->setAttribute("xrpl.consensus.ledger.seq", static_cast<int64_t>(prevLgr.seq() + 1));
+    roundSpan_->setAttribute("xrpl.consensus.mode", to_string(mode_.load()).c_str());
+    roundSpan_->setAttribute("xrpl.consensus.trace_strategy", strategy.c_str());
+    roundSpan_->setAttribute("xrpl.consensus.round_id", static_cast<int64_t>(prevLgr.seq() + 1));
+
+    // Snapshot the SpanContext for cross-thread use by createValidationSpan().
+    roundSpanContext_ = roundSpan_->span().GetContext();
+}
+
+std::optional<telemetry::SpanGuard>
+RCLConsensus::Adaptor::createValidationSpan()
+{
+    if (!app_.getTelemetry().shouldTraceConsensus())
+        return std::nullopt;
+
+    // Build span link to the round span (follows-from relationship).
+    // The validation is triggered by the round but executes on a
+    // different thread and may outlive the round span.
+    std::vector<std::pair<
+        opentelemetry::trace::SpanContext,
+        std::vector<std::pair<std::string, opentelemetry::common::AttributeValue>>>>
+        links;
+
+    // Use the snapshotted SpanContext (set on consensus thread in
+    // startRoundTracing) rather than accessing roundSpan_ directly,
+    // since this method runs on the jtACCEPT worker thread.
+    if (roundSpanContext_ && roundSpanContext_->IsValid())
+    {
+        links.push_back({*roundSpanContext_, {}});
+    }
+
+    return telemetry::SpanGuard(app_.getTelemetry().startSpan(
+        "consensus.validation.send", opentelemetry::context::RuntimeContext::GetCurrent(), links));
+}
+#endif
+
 void
 RCLConsensus::startRound(
    NetClock::time_point const& now,
--- a/src/xrpld/app/consensus/RCLConsensus.h
+++ b/src/xrpld/app/consensus/RCLConsensus.h
@@ -13,9 +13,16 @@
 #include <xrpl/protocol/RippleLedgerHash.h>
 #include <xrpl/shamap/SHAMap.h>

+#ifdef XRPL_ENABLE_TELEMETRY
+#include <xrpl/telemetry/SpanGuard.h>
+
+#include <opentelemetry/context/context.h>
+#endif
+
 #include <atomic>
 #include <memory>
 #include <mutex>
+#include <optional>
 #include <set>
 #include <sstream>
 #include <string>
@@ -27,6 +34,10 @@ class LocalTxs;
 class LedgerMaster;
 class ValidatorKeys;

+namespace telemetry {
+class Telemetry;
+}  // namespace telemetry
+
 /** Manages the generic consensus algorithm for use by the RCL.
 */
 class RCLConsensus
@@ -68,6 +79,34 @@ class RCLConsensus
        RCLCensorshipDetector<TxID, LedgerIndex> censorshipDetector_;
        NegativeUNLVote nUnlVote_;

+#ifdef XRPL_ENABLE_TELEMETRY
+        /** Span for the current consensus round.
+         *
+         *  Created in preStartRound(), ended (via reset()) when the next
+         *  round begins.  When consensusTraceStrategy is "deterministic",
+         *  the trace_id is derived from previousLedger.id() so that all
+         *  validators in the same round share the same trace_id.
+         */
+        std::optional<telemetry::SpanGuard> roundSpan_;
+
+        /** Context captured from the previous consensus round.
+         *
+         *  Used to create span links (follows-from) between consecutive
+         *  rounds, establishing a causal chain in the trace backend.
+         *  Default-constructed (empty) until the first round completes.
+         */
+        opentelemetry::context::Context prevRoundContext_;
+
+        /** SpanContext snapshot of the current round span.
+         *
+         *  Captured in startRoundTracing() as a lightweight value-type copy
+         *  so that createValidationSpan() — which runs on the jtACCEPT
+         *  worker thread — can build span links without accessing roundSpan_
+         *  across threads.
+         */
+        std::optional<opentelemetry::trace::SpanContext> roundSpanContext_;
+#endif
+
    public:
        using Ledger_t = RCLCxLedger;
        using NodeID_t = NodeID;
@@ -156,6 +195,51 @@ class RCLConsensus
            return parms_;
        }

+#ifdef XRPL_ENABLE_TELEMETRY
+        /** Provide access to the telemetry subsystem for consensus tracing.
+         *
+         * Called by Consensus.h template methods (phaseEstablish,
+         * updateOurPositions, haveConsensus) to create child spans under the
+         * consensus round.  When XRPL_ENABLE_TELEMETRY is not defined, the
+         * macros in Consensus.h expand to no-ops and this method is never
+         * called.
+         *
+         * @return Reference to the application's Telemetry instance.
+         */
+        telemetry::Telemetry&
+        getTelemetry();
+
+        /** Set up the consensus round span and link it to the previous round.
+         *
+         * Extracted from preStartRound() to keep business logic free of
+         * telemetry details.  Saves the previous round's OTel context for
+         * span-link construction, ends the old round span, and creates a
+         * new "consensus.round" span.  Depending on the configured trace
+         * strategy the trace_id is either deterministic (derived from
+         * @p prevLgr hash) or random.
+         *
+         * @param prevLgr  The ledger that will be the prior ledger for the
+         *                 new round — used to derive deterministic trace IDs
+         *                 and to set standard span attributes.
+         */
+        void
+        startRoundTracing(RCLCxLedger const& prevLgr);
+
+        /** Create the "consensus.validation.send" span with a link to the
+         *  current round span.
+         *
+         * Extracted from validate() to keep the validation business logic
+         * free of span-construction boilerplate.  The returned SpanGuard
+         * must be assigned to a local `_xrpl_guard_` so that subsequent
+         * XRPL_TRACE_SET_ATTR calls in the caller can reference it.
+         *
+         * @return An engaged optional SpanGuard if tracing is active,
+         *         std::nullopt otherwise.
+         */
+        std::optional<telemetry::SpanGuard>
+        createValidationSpan();
+#endif
+
    private:
        //---------------------------------------------------------------------
        // The following members implement the generic Consensus requirements
--- a/src/xrpld/app/main/Application.cpp
+++ b/src/xrpld/app/main/Application.cpp
@@ -31,7 +31,6 @@
 #include <xrpld/shamap/NodeFamily.h>

 #include <xrpl/basics/ByteUtilities.h>
-#include <xrpl/basics/MallocTrim.h>
 #include <xrpl/basics/ResolverAsio.h>
 #include <xrpl/basics/random.h>
 #include <xrpl/beast/asio/io_latency_probe.h>
@@ -52,6 +51,7 @@
 #include <xrpl/resource/Fees.h>
 #include <xrpl/server/LoadFeeTrack.h>
 #include <xrpl/server/Wallet.h>
+#include <xrpl/telemetry/Telemetry.h>
 #include <xrpl/tx/apply.h>

 #include <boost/algorithm/string/predicate.hpp>
@@ -147,6 +147,7 @@ public:

    beast::Journal m_journal;
    std::unique_ptr<perf::PerfLog> perfLog_;
+    std::unique_ptr<telemetry::Telemetry> telemetry_;
    Application::MutexType m_masterMutex;

    // Required by the SHAMapStore
@@ -258,6 +259,14 @@ public:
                  logs_->journal("PerfLog"),
                  [this] { signalStop("PerfLog"); }))

+        , telemetry_(
+              telemetry::make_Telemetry(
+                  telemetry::setup_Telemetry(
+                      config_->section("telemetry"),
+                      "",  // Updated later via setServiceInstanceId()
+                      BuildInfo::getVersionString()),
+                  logs_->journal("Telemetry")))
+
        , m_txMaster(*this)

        , m_collectorManager(
@@ -624,6 +633,12 @@ public:
        return *perfLog_;
    }

+    telemetry::Telemetry&
+    getTelemetry() override
+    {
+        return *telemetry_;
+    }
+
    NodeCache&
    getTempNodeCache() override
    {
@@ -1060,8 +1075,6 @@ public:
                                    << "; size after: " << cachedSLEs_.size();
        }

-        mallocTrim("doSweep", m_journal);
-
        // Set timer to do another sweep later.
        setSweepTimer();
    }
@@ -1267,6 +1280,14 @@ ApplicationImp::setup(boost::program_options::variables_map const& cmdline)

    nodeIdentity_ = getNodeIdentity(*this, cmdline);

+    // Now that the node identity is known, inject it into the telemetry
+    // resource attributes — but only if the user didn't already set a
+    // custom service_instance_id in [telemetry].  The Telemetry object
+    // was constructed with an empty serviceInstanceId because
+    // nodeIdentity_ is not available in the member initializer list.
+    if (!config_->section("telemetry").exists("service_instance_id"))
+        telemetry_->setServiceInstanceId(toBase58(TokenType::NodePublic, nodeIdentity_->first));
+
    if (!cluster_->load(config().section(SECTION_CLUSTER_NODES)))
    {
        JLOG(m_journal.fatal()) << "Invalid entry in cluster configuration.";
@@ -1479,6 +1500,7 @@ ApplicationImp::start(bool withTimers)

    ledgerCleaner_->start();
    perfLog_->start();
+    telemetry_->start();
 }

 void
@@ -1569,6 +1591,7 @@ ApplicationImp::run()
    ledgerCleaner_->stop();
    m_nodeStore->stop();
    perfLog_->stop();
+    telemetry_->stop();

    JLOG(m_journal.info()) << "Done.";
 }
--- a/src/xrpld/app/misc/NetworkOPs.cpp
+++ b/src/xrpld/app/misc/NetworkOPs.cpp
@@ -29,6 +29,7 @@
 #include <xrpld/rpc/DeliveredAmount.h>
 #include <xrpld/rpc/MPTokenIssuanceID.h>
 #include <xrpld/rpc/ServerHandler.h>
+#include <xrpld/telemetry/TracingInstrumentation.h>

 #include <xrpl/basics/UptimeClock.h>
 #include <xrpl/basics/mulDiv.h>
@@ -1225,6 +1226,10 @@ NetworkOPsImp::processTransaction(
    bool bLocal,
    FailHard failType)
 {
+    XRPL_TRACE_TX(registry_.getTelemetry(), "tx.process");
+    XRPL_TRACE_SET_ATTR("xrpl.tx.hash", to_string(transaction->getID()).c_str());
+    XRPL_TRACE_SET_ATTR("xrpl.tx.local", bLocal);
+
    auto ev = m_job_queue.makeLoadEvent(jtTXN_PROC, "ProcessTXN");

    // preProcessTransaction can change our pointer
@@ -1233,10 +1238,12 @@ NetworkOPsImp::processTransaction(

    if (bLocal)
    {
+        XRPL_TRACE_SET_ATTR("xrpl.tx.path", "sync");
        doTransactionSync(transaction, bUnlimited, failType);
    }
    else
    {
+        XRPL_TRACE_SET_ATTR("xrpl.tx.path", "async");
        doTransactionAsync(transaction, bUnlimited, failType);
    }
 }
--- a/src/xrpld/consensus/Consensus.h
+++ b/src/xrpld/consensus/Consensus.h
@@ -11,6 +11,12 @@
 #include <xrpl/beast/utility/Journal.h>
 #include <xrpl/json/json_writer.h>

+#ifdef XRPL_ENABLE_TELEMETRY
+#include <xrpld/telemetry/TracingInstrumentation.h>
+
+#include <xrpl/telemetry/SpanGuard.h>
+#endif
+
 #include <algorithm>
 #include <chrono>
 #include <deque>
@@ -601,6 +607,44 @@ private:
    // nodes that have bowed out of this consensus process
    hash_set<NodeID_t> deadNodes_;

+#ifdef XRPL_ENABLE_TELEMETRY
+    /** Span for the establish phase of consensus.
+     *
+     *  Created when the ledger closes and we enter phaseEstablish;
+     *  cleared (ended) when consensus is reached and we move to the
+     *  accept phase. This span is a child of the round span that
+     *  lives in the Adaptor (via thread-local OTel context propagation).
+     */
+    std::optional<xrpl::telemetry::SpanGuard> establishSpan_;
+
+    /** Create the establish-phase span if not yet active.
+     *
+     * Called on each phaseEstablish() invocation. Creates a
+     * "consensus.establish" span on the first call and stores it in
+     * establishSpan_.  Subsequent calls are no-ops while the span is
+     * still live.
+     */
+    void
+    startEstablishTracing();
+
+    /** Update establish span attributes for the current iteration.
+     *
+     * Overwrites convergence metrics (converge_percent, establish_count,
+     * proposers) on each call so the final span always reflects the last
+     * state before consensus was reached.
+     */
+    void
+    updateEstablishTracing();
+
+    /** End the establish span when transitioning to the accepted phase.
+     *
+     * Resets establishSpan_, which triggers the SpanGuard destructor and
+     * ends the span.
+     */
+    void
+    endEstablishTracing();
+#endif
+
    // Journal for debugging
    beast::Journal const j_;
 };
@@ -1301,6 +1345,10 @@ Consensus<Adaptor>::phaseEstablish(std::unique_ptr<std::stringstream> const& clo
    // can only establish consensus if we already took a stance
    XRPL_ASSERT(result_, "xrpl::Consensus::phaseEstablish : result is set");

+#ifdef XRPL_ENABLE_TELEMETRY
+    startEstablishTracing();
+#endif
+
    ++peerUnchangedCounter_;
    ++establishCounter_;

@@ -1318,6 +1366,10 @@ Consensus<Adaptor>::phaseEstablish(std::unique_ptr<std::stringstream> const& clo
               << "previous round duration: " << prevRoundTime_.count() << "ms, "
               << "avMIN_CONSENSUS_TIME: " << parms.avMIN_CONSENSUS_TIME.count() << "ms. ";

+#ifdef XRPL_ENABLE_TELEMETRY
+    updateEstablishTracing();
+#endif
+
    // Give everyone a chance to take an initial position
    if (result_->roundTime.read() < parms.ledgerMIN_CONSENSUS)
    {
@@ -1345,6 +1397,11 @@ Consensus<Adaptor>::phaseEstablish(std::unique_ptr<std::stringstream> const& clo
    adaptor_.updateOperatingMode(currPeerPositions_.size());
    prevProposers_ = currPeerPositions_.size();
    prevRoundTime_ = result_->roundTime.read();
+
+#ifdef XRPL_ENABLE_TELEMETRY
+    endEstablishTracing();
+#endif
+
    phase_ = ConsensusPhase::accepted;
    JLOG(j_.debug()) << "transitioned to ConsensusPhase::accepted";
    adaptor_.onAccept(
@@ -1357,6 +1414,40 @@ Consensus<Adaptor>::phaseEstablish(std::unique_ptr<std::stringstream> const& clo
        adaptor_.validating());
 }

+#ifdef XRPL_ENABLE_TELEMETRY
+template <class Adaptor>
+void
+Consensus<Adaptor>::startEstablishTracing()
+{
+    if (!establishSpan_ && adaptor_.getTelemetry().shouldTraceConsensus())
+    {
+        establishSpan_.emplace(adaptor_.getTelemetry().startSpan("consensus.establish"));
+    }
+}
+
+template <class Adaptor>
+void
+Consensus<Adaptor>::updateEstablishTracing()
+{
+    if (establishSpan_)
+    {
+        establishSpan_->setAttribute(
+            "xrpl.consensus.converge_percent", static_cast<int64_t>(convergePercent_));
+        establishSpan_->setAttribute(
+            "xrpl.consensus.establish_count", static_cast<int64_t>(establishCounter_));
+        establishSpan_->setAttribute(
+            "xrpl.consensus.proposers", static_cast<int64_t>(currPeerPositions_.size()));
+    }
+}
+
+template <class Adaptor>
+void
+Consensus<Adaptor>::endEstablishTracing()
+{
+    establishSpan_.reset();
+}
+#endif  // XRPL_ENABLE_TELEMETRY
+
 template <class Adaptor>
 void
 Consensus<Adaptor>::closeLedger(std::unique_ptr<std::stringstream> const& clog)
@@ -1419,6 +1510,31 @@ Consensus<Adaptor>::updateOurPositions(std::unique_ptr<std::stringstream> const&
 {
    // We must have a position if we are updating it
    XRPL_ASSERT(result_, "xrpl::Consensus::updateOurPositions : result is set");
+
+    /// @brief Scoped span tracking a single position-update pass.
+    /// Records the number of active disputes, current convergence
+    /// percentage, and total proposers. Dispute resolution events are
+    /// recorded as span events with the affected transaction ID and vote.
+    XRPL_TRACE_CONSENSUS(adaptor_.getTelemetry(), "consensus.update_positions");
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.disputes_count", static_cast<int64_t>(result_->disputes.size()));
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.converge_percent", static_cast<int64_t>(convergePercent_));
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.proposers_total", static_cast<int64_t>(currPeerPositions_.size()));
+
+    /// Count peers that agree with our current position and record as
+    /// an attribute on the update_positions span.
+    {
+        int agreedCount = 0;
+        auto const ourPos = result_->position.position();
+        for (auto const& [nodeId, peerPos] : currPeerPositions_)
+        {
+            if (peerPos.proposal().position() == ourPos)
+                ++agreedCount;
+        }
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.proposers_agreed", static_cast<int64_t>(agreedCount));
+    }
+
    ConsensusParms const& parms = adaptor_.parms();

    // Compute a cutoff time
@@ -1465,6 +1581,15 @@ Consensus<Adaptor>::updateOurPositions(std::unique_ptr<std::stringstream> const&
            if (dispute.updateVote(
                    convergePercent_, mode_.get() == ConsensusMode::proposing, parms))
            {
+                /// Record dispute resolution event with transaction ID,
+                /// new vote direction, and current yay/nay counts.
+                XRPL_TRACE_ADD_EVENT(
+                    "dispute.resolve",
+                    {{"xrpl.dispute.tx_id", to_string(txId)},
+                     {"xrpl.dispute.our_vote", dispute.getOurVote()},
+                     {"xrpl.dispute.yays", static_cast<int64_t>(dispute.getYays())},
+                     {"xrpl.dispute.nays", static_cast<int64_t>(dispute.getNays())}});
+
                if (!mutableSet)
                    mutableSet.emplace(result_->txns);

@@ -1600,6 +1725,12 @@ Consensus<Adaptor>::haveConsensus(std::unique_ptr<std::stringstream> const& clog
    // Must have a stance if we are checking for consensus
    XRPL_ASSERT(result_, "xrpl::Consensus::haveConsensus : has result");

+    /// @brief Scoped span tracking a single consensus-check pass.
+    /// Records the number of agreeing/disagreeing peers, convergence
+    /// percentage, and the resulting ConsensusState (Yes/No/MovedOn/Expired).
+    /// Also captures the current avalanche threshold percentage.
+    XRPL_TRACE_CONSENSUS(adaptor_.getTelemetry(), "consensus.check");
+
    // CHECKME: should possibly count unacquired TX sets as disagreeing
    int agree = 0, disagree = 0;

@@ -1620,11 +1751,22 @@ Consensus<Adaptor>::haveConsensus(std::unique_ptr<std::stringstream> const& clog
            ++disagree;
        }
    }
+
+    /// Record agreement counts and convergence progress on the span.
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.agree_count", static_cast<int64_t>(agree));
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.disagree_count", static_cast<int64_t>(disagree));
+    XRPL_TRACE_SET_ATTR("xrpl.consensus.converge_percent", static_cast<int64_t>(convergePercent_));
+
    auto currentFinished = adaptor_.proposersFinished(previousLedger_, prevLedgerID_);

    JLOG(j_.debug()) << "Checking for TX consensus: agree=" << agree << ", disagree=" << disagree;

    ConsensusParms const& parms = adaptor_.parms();
+
+    /// Record the minimum consensus threshold percentage (typically 80%).
+    XRPL_TRACE_SET_ATTR(
+        "xrpl.consensus.threshold_percent", static_cast<int64_t>(parms.minCONSENSUS_PCT));
+
    // Stalling is BAD. It means that we have a consensus on the close time, so
    // peers are talking, but we have disputed transactions that peers are
    // unable or unwilling to come to agreement on one way or the other.
@@ -1657,6 +1799,27 @@ Consensus<Adaptor>::haveConsensus(std::unique_ptr<std::stringstream> const& clog
        j_,
        clog);

+    /// Record the consensus check outcome as a string attribute.
+    {
+        char const* stateStr = "unknown";
+        switch (result_->state)
+        {
+            case ConsensusState::No:
+                stateStr = "no";
+                break;
+            case ConsensusState::MovedOn:
+                stateStr = "moved_on";
+                break;
+            case ConsensusState::Yes:
+                stateStr = "yes";
+                break;
+            case ConsensusState::Expired:
+                stateStr = "expired";
+                break;
+        }
+        XRPL_TRACE_SET_ATTR("xrpl.consensus.result", stateStr);
+    }
+
    if (result_->state == ConsensusState::No)
    {
        CLOG(clog) << "No consensus. ";
--- a/src/xrpld/consensus/DisputedTx.h
+++ b/src/xrpld/consensus/DisputedTx.h
@@ -58,6 +58,20 @@ public:
        return ourVote_;
    }

+    //! Number of peers voting to include the transaction.
+    [[nodiscard]] int
+    getYays() const
+    {
+        return yays_;
+    }
+
+    //! Number of peers voting to exclude the transaction.
+    [[nodiscard]] int
+    getNays() const
+    {
+        return nays_;
+    }
+
    //! Are we and our peers "stalled" where we probably won't change
    //! our vote?
    bool
--- a/src/xrpld/overlay/detail/PeerImp.cpp
+++ b/src/xrpld/overlay/detail/PeerImp.cpp
@@ -8,6 +8,7 @@
 #include <xrpld/overlay/Cluster.h>
 #include <xrpld/overlay/detail/PeerImp.h>
 #include <xrpld/overlay/detail/Tuning.h>
+#include <xrpld/telemetry/TracingInstrumentation.h>

 #include <xrpl/basics/UptimeClock.h>
 #include <xrpl/basics/base64.h>
@@ -1354,6 +1355,9 @@ PeerImp::handleTransaction(
    bool eraseTxQueue,
    bool batch)
 {
+    XRPL_TRACE_TX(app_.getTelemetry(), "tx.receive");
+    XRPL_TRACE_SET_ATTR("xrpl.peer.id", static_cast<int64_t>(id_));
+
    XRPL_ASSERT(eraseTxQueue != batch, ("xrpl::PeerImp::handleTransaction : valid inputs"));
    if (tracking_.load() == Tracking::diverged)
        return;
@@ -1372,6 +1376,7 @@ PeerImp::handleTransaction(
    {
        auto stx = std::make_shared<STTx const>(sit);
        uint256 txID = stx->getTransactionID();
+        XRPL_TRACE_SET_ATTR("xrpl.tx.hash", to_string(txID).c_str());

        // Charge strongly for attempting to relay a txn with tfInnerBatchTxn
        // LCOV_EXCL_START
@@ -1405,9 +1410,11 @@ PeerImp::handleTransaction(

        if (!app_.getHashRouter().shouldProcess(txID, id_, flags, tx_interval))
        {
+            XRPL_TRACE_SET_ATTR("xrpl.tx.suppressed", true);
            // we have seen this transaction recently
            if (any(flags & HashRouterFlags::BAD))
            {
+                XRPL_TRACE_SET_ATTR("xrpl.tx.status", "known_bad");
                fee_.update(Resource::feeUselessData, "known bad");
                JLOG(p_journal_.debug()) << "Ignoring known bad tx " << txID;
            }
--- a/src/xrpld/rpc/detail/RPCHandler.cpp
+++ b/src/xrpld/rpc/detail/RPCHandler.cpp
@@ -8,6 +8,7 @@
 #include <xrpld/rpc/Role.h>
 #include <xrpld/rpc/detail/Handler.h>
 #include <xrpld/rpc/detail/Tuning.h>
+#include <xrpld/telemetry/TracingInstrumentation.h>

 #include <xrpl/basics/Log.h>
 #include <xrpl/core/JobQueue.h>
@@ -157,6 +158,11 @@ template <class Object, class Method>
 Status
 callMethod(JsonContext& context, Method method, std::string const& name, Object& result)
 {
+    XRPL_TRACE_RPC(context.app.getTelemetry(), "rpc.command." + name);
+    XRPL_TRACE_SET_ATTR("xrpl.rpc.command", name.c_str());
+    XRPL_TRACE_SET_ATTR("xrpl.rpc.version", static_cast<int64_t>(context.apiVersion));
+    XRPL_TRACE_SET_ATTR("xrpl.rpc.role", (context.role == Role::ADMIN ? "admin" : "user"));
+
    static std::atomic<std::uint64_t> requestId{0};
    auto& perfLog = context.app.getPerfLog();
    std::uint64_t const curId = ++requestId;
@@ -169,15 +175,22 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
        auto ret = method(context, result);
        auto end = std::chrono::system_clock::now();

+        [[maybe_unused]] auto const durationMs =
+            std::chrono::duration<double, std::milli>(end - start).count();
        JLOG(context.j.debug()) << "RPC call " << name << " completed in "
                                << ((end - start).count() / 1000000000.0) << "seconds";
        perfLog.rpcFinish(name, curId);
+        XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");
+        XRPL_TRACE_SET_ATTR("xrpl.rpc.duration_ms", durationMs);
        return ret;
    }
    catch (std::exception& e)
    {
        perfLog.rpcError(name, curId);
        JLOG(context.j.info()) << "Caught throw: " << e.what();
+        XRPL_TRACE_EXCEPTION(e);
+        XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "error");
+        XRPL_TRACE_SET_ATTR("xrpl.rpc.error_message", e.what());

        if (context.loadType == Resource::feeReferenceRPC)
            context.loadType = Resource::feeExceptionRPC;
--- a/src/xrpld/rpc/detail/ServerHandler.cpp
+++ b/src/xrpld/rpc/detail/ServerHandler.cpp
@@ -7,6 +7,7 @@
 #include <xrpld/rpc/detail/Tuning.h>
 #include <xrpld/rpc/detail/WSInfoSub.h>
 #include <xrpld/rpc/json_body.h>
+#include <xrpld/telemetry/TracingInstrumentation.h>

 #include <xrpl/basics/Log.h>
 #include <xrpl/basics/base64.h>
@@ -267,6 +268,8 @@ buffers_to_string(ConstBufferSequence const& bs)
 void
 ServerHandler::onRequest(Session& session)
 {
+    XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.request");
+
    // Make sure RPC is enabled on the port
    if (session.port().protocol.count("http") == 0 && session.port().protocol.count("https") == 0)
    {
@@ -382,6 +385,7 @@ ServerHandler::processSession(
    std::shared_ptr<JobQueue::Coro> const& coro,
    Json::Value const& jv)
 {
+    XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.ws_message");
    auto is = std::static_pointer_cast<WSInfoSub>(session->appDefined);
    if (is->getConsumer().disconnect(m_journal))
    {
@@ -574,6 +578,7 @@ ServerHandler::processRequest(
    std::string_view forwardedFor,
    std::string_view user)
 {
+    XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.process");
    auto rpcJ = app_.journal("RPC");

    Json::Value jsonOrig;
--- a/src/xrpld/telemetry/TracingInstrumentation.h
+++ b/src/xrpld/telemetry/TracingInstrumentation.h
@@ -0,0 +1,162 @@
+#pragma once
+
+/** Convenience macros for instrumenting code with OpenTelemetry trace spans.
+
+    When XRPL_ENABLE_TELEMETRY is defined, the macros create SpanGuard objects
+    that manage span lifetime via RAII. When not defined, all macros expand to
+    ((void)0) with zero overhead.
+
+    Usage in instrumented code:
+    @code
+        XRPL_TRACE_RPC(app.getTelemetry(), "rpc.command." + name);
+        XRPL_TRACE_SET_ATTR("xrpl.rpc.command", name);
+        XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");
+    @endcode
+
+    @note Macro parameter names use leading/trailing underscores
+    (e.g. _tel_obj_) to avoid colliding with identifiers in the macro body,
+    specifically the ::xrpl::telemetry:: namespace qualifier.
+*/
+
+#ifdef XRPL_ENABLE_TELEMETRY
+
+#include <xrpl/telemetry/SpanGuard.h>
+#include <xrpl/telemetry/Telemetry.h>
+
+#include <optional>
+
+namespace xrpl {
+namespace telemetry {
+
+/** Start an unconditional span, ended when the guard goes out of scope.
+    @param _tel_obj_    Telemetry instance reference.
+    @param _span_name_  Span name string.
+*/
+#define XRPL_TRACE_SPAN(_tel_obj_, _span_name_)            \
+    auto _xrpl_span_ = (_tel_obj_).startSpan(_span_name_); \
+    ::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
+
+/** Start an unconditional span with a specific SpanKind.
+    @param _tel_obj_    Telemetry instance reference.
+    @param _span_name_  Span name string.
+    @param _span_kind_  opentelemetry::trace::SpanKind value.
+*/
+#define XRPL_TRACE_SPAN_KIND(_tel_obj_, _span_name_, _span_kind_)       \
+    auto _xrpl_span_ = (_tel_obj_).startSpan(_span_name_, _span_kind_); \
+    ::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
+
+/** Conditionally start a span for RPC tracing.
+    The span is only created if shouldTraceRpc() returns true.
+    @param _tel_obj_    Telemetry instance reference.
+    @param _span_name_  Span name string.
+*/
+#define XRPL_TRACE_RPC(_tel_obj_, _span_name_)                    \
+    std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_;     \
+    if ((_tel_obj_).shouldTraceRpc())                             \
+    {                                                             \
+        _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
+    }
+
+/** Conditionally start a span for transaction tracing.
+    The span is only created if shouldTraceTransactions() returns true.
+    @param _tel_obj_    Telemetry instance reference.
+    @param _span_name_  Span name string.
+*/
+#define XRPL_TRACE_TX(_tel_obj_, _span_name_)                     \
+    std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_;     \
+    if ((_tel_obj_).shouldTraceTransactions())                    \
+    {                                                             \
+        _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
+    }
+
+/** Conditionally start a span for consensus tracing.
+    The span is only created if shouldTraceConsensus() returns true.
+    @param _tel_obj_    Telemetry instance reference.
+    @param _span_name_  Span name string.
+*/
+#define XRPL_TRACE_CONSENSUS(_tel_obj_, _span_name_)              \
+    std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_;     \
+    if ((_tel_obj_).shouldTraceConsensus())                       \
+    {                                                             \
+        _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
+    }
+
+/** Conditionally start a span for peer message tracing.
+    The span is only created if shouldTracePeer() returns true.
+    @param _tel_obj_    Telemetry instance reference.
+    @param _span_name_  Span name string.
+*/
+#define XRPL_TRACE_PEER(_tel_obj_, _span_name_)                   \
+    std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_;     \
+    if ((_tel_obj_).shouldTracePeer())                            \
+    {                                                             \
+        _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
+    }
+
+/** Conditionally start a span for ledger tracing.
+    The span is only created if shouldTraceLedger() returns true.
+    @param _tel_obj_    Telemetry instance reference.
+    @param _span_name_  Span name string.
+*/
+#define XRPL_TRACE_LEDGER(_tel_obj_, _span_name_)                 \
+    std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_;     \
+    if ((_tel_obj_).shouldTraceLedger())                          \
+    {                                                             \
+        _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
+    }
+
+/** Set a key-value attribute on the current span (if it exists).
+    Must be used after one of the XRPL_TRACE_* span macros.
+*/
+#define XRPL_TRACE_SET_ATTR(key, value)         \
+    if (_xrpl_guard_.has_value())               \
+    {                                           \
+        _xrpl_guard_->setAttribute(key, value); \
+    }
+
+/** Record an exception on the current span and mark it as error.
+    Must be used after one of the XRPL_TRACE_* span macros.
+*/
+#define XRPL_TRACE_EXCEPTION(e)           \
+    if (_xrpl_guard_.has_value())         \
+    {                                     \
+        _xrpl_guard_->recordException(e); \
+    }
+
+/** Add a named event with attributes to the current trace span.
+
+    Uses the `_xrpl_guard_` local variable created by XRPL_TRACE_* macros.
+    Example:
+    @code
+        XRPL_TRACE_ADD_EVENT("dispute.resolve", {
+            {"xrpl.tx.id", std::string(tx_id)},
+            {"xrpl.dispute.our_vote", our_vote}
+        });
+    @endcode
+*/
+#define XRPL_TRACE_ADD_EVENT(name, ...)                \
+    do                                                 \
+    {                                                  \
+        if (_xrpl_guard_.has_value())                  \
+        {                                              \
+            _xrpl_guard_->addEvent(name, __VA_ARGS__); \
+        }                                              \
+    } while (0)
+
+}  // namespace telemetry
+}  // namespace xrpl
+
+#else  // XRPL_ENABLE_TELEMETRY not defined
+
+#define XRPL_TRACE_SPAN(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_SPAN_KIND(_tel_obj_, _span_name_, _span_kind_) ((void)0)
+#define XRPL_TRACE_RPC(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_TX(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_CONSENSUS(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_PEER(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_LEDGER(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_SET_ATTR(key, value) ((void)0)
+#define XRPL_TRACE_EXCEPTION(e) ((void)0)
+#define XRPL_TRACE_ADD_EVENT(name, ...) ((void)0)
+
+#endif  // XRPL_ENABLE_TELEMETRY
Author	SHA1	Message	Date
Pratik Mankawde	b4e5a60e7c	Phase 4: Consensus tracing - round lifecycle, proposals, validations, close time Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 19:17:59 +00:00
Pratik Mankawde	9d62d6d4cd	Phase 3: Transaction tracing - protobuf context propagation, PeerImp, NetworkOPs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 19:17:37 +00:00
Pratik Mankawde	9b0ce6da08	Phase 2: RPC tracing - span macros, attributes, WebSocket, command spans Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 19:17:05 +00:00
Pratik Mankawde	833559c183	Phase 1b: Telemetry core infrastructure - CMake, Conan, SpanGuard, config Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 19:15:00 +00:00
Pratik Mankawde	26bc7e7321	Phase 1c: RPC integration - ServerHandler tracing, telemetry config wiring Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 19:14:41 +00:00
Pratik Mankawde	a726c62885	Phase 1b: Telemetry core infrastructure - CMake, Conan, SpanGuard, config Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 19:14:41 +00:00
Pratik Mankawde	6c39ad86ef	Phase 1b: Telemetry core infrastructure - CMake, Conan, SpanGuard, config Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 19:14:20 +00:00
Pratik Mankawde	180838d985	docs: correct OTel overhead estimates against SDK benchmarks Verified CPU, memory, and network overhead calculations against official OTel C++ SDK benchmarks (969 CI runs) and source code analysis. Key corrections: - Span creation: 200-500ns → 500-1000ns (SDK BM_SpanCreation median ~1000ns; original estimate matched API no-op, not SDK path) - Per-TX overhead: 2.4μs → 4.0μs (2.0% vs 1.2%; still within 1-3%) - Active span memory: ~200 bytes → ~500-800 bytes (Span wrapper + SpanData + std::map attribute storage) - Static memory: ~456KB → ~8.3MB (BatchSpanProcessor worker thread stack ~8MB was omitted) - Total memory ceiling: ~2.3MB → ~10MB - Memory success metric target: <5MB → <10MB - AddEvent: 50-80ns → 100-200ns Added Section 3.5.4 with links to all benchmark sources. Updated presentation.md with matching corrections. High-level conclusions unchanged (1-3% CPU, negligible consensus). Also includes: review fixes, cross-document consistency improvements, additional component tracing docs (PathFinding, TxQ, Validator, etc.), context size corrections (32 → 25 bytes). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-24 19:11:12 +00:00
Pratik Mankawde	9ff66f03a6	Merge branch 'develop' into pratik/otel-phase1a-plan-docs	2026-03-24 16:33:15 +00:00
Pratik Mankawde	30d1c286c9	Merge remote-tracking branch 'origin/develop' into pratik/otel-phase1a-plan-docs	2026-03-24 16:26:14 +00:00
Pratik Mankawde	402933af78	moved presentation.md file Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>	2026-03-24 16:26:03 +00:00
Pratik Mankawde	346927d673	Merge branch 'develop' into pratik/otel-phase1a-plan-docs	2026-03-20 16:55:10 +00:00
Pratik Mankawde	3cc13976dc	Remove effort estimates from implementation phases document Strip effort/risk columns from task tables and remove the §6.9 Effort Summary section with its pie chart and resource requirements table. Renumber §6.10 Quick Wins → §6.9. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-17 15:00:19 +00:00
Pratik Mankawde	fe6cd31762	Add Phase 4a implementation status to plan docs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-12 22:08:13 +00:00
Pratik Mankawde	fd18cf9e01	Merge remote-tracking branch 'origin/develop' into pratik/otel-phase1a-plan-docs	2026-03-11 14:58:44 +00:00
Pratik Mankawde	d6bf13394e	Appendix: add 00-tracing-fundamentals.md and POC_taskList.md to document index Split document index into Plan Documents and Task Lists sections. These files were introduced in this branch but missing from the index. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-09 19:03:09 +00:00
Pratik Mankawde	34243e0cc2	Phase 1a: OpenTelemetry plan documentation Add comprehensive planning documentation for the OpenTelemetry distributed tracing integration: - Tracing fundamentals and concepts - Architecture analysis of rippled's tracing surface area - Design decisions and trade-offs - Implementation strategy and code samples - Configuration reference - Implementation phases roadmap - Observability backend comparison - POC task list and presentation materials Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-09 19:03:09 +00:00