diff --git a/.github/scripts/levelization/results/ordering.txt b/.github/scripts/levelization/results/ordering.txt
index cb35a7e6cc..4187e99657 100644
--- a/.github/scripts/levelization/results/ordering.txt
+++ b/.github/scripts/levelization/results/ordering.txt
@@ -268,6 +268,7 @@ xrpld.perflog > xrpl.json
xrpld.rpc > xrpl.basics
xrpld.rpc > xrpl.core
xrpld.rpc > xrpld.core
+xrpld.rpc > xrpld.telemetry
xrpld.rpc > xrpl.json
xrpld.rpc > xrpl.ledger
xrpld.rpc > xrpl.net
@@ -278,3 +279,4 @@ xrpld.rpc > xrpl.resource
xrpld.rpc > xrpl.server
xrpld.rpc > xrpl.tx
xrpld.shamap > xrpl.shamap
+xrpld.telemetry > xrpl.telemetry
diff --git a/presentation.md b/presentation.md
new file mode 100644
index 0000000000..7a443a635c
--- /dev/null
+++ b/presentation.md
@@ -0,0 +1,280 @@
+# OpenTelemetry Distributed Tracing for rippled
+
+---
+
+## Slide 1: Introduction
+
+### What is OpenTelemetry?
+
+OpenTelemetry is an open-source, CNCF-backed observability framework for distributed tracing, metrics, and logs.
+
+### Why OpenTelemetry for rippled?
+
+- **End-to-End Transaction Visibility**: Track transactions from submission → consensus → ledger inclusion
+- **Cross-Node Correlation**: Follow requests across multiple independent nodes using a unique `trace_id`
+- **Consensus Round Analysis**: Understand timing and behavior across validators
+- **Incident Debugging**: Correlate events across distributed nodes during issues
+
+```mermaid
+flowchart LR
+ A["Node A
tx.receive
trace_id: abc123"] --> B["Node B
tx.relay
trace_id: abc123"] --> C["Node C
tx.validate
trace_id: abc123"] --> D["Node D
ledger.apply
trace_id: abc123"]
+
+ style A fill:#1565c0,stroke:#0d47a1,color:#fff
+ style B fill:#2e7d32,stroke:#1b5e20,color:#fff
+ style C fill:#2e7d32,stroke:#1b5e20,color:#fff
+ style D fill:#e65100,stroke:#bf360c,color:#fff
+```
+
+> **Trace ID: abc123** — All nodes share the same trace, enabling cross-node correlation.
+
+---
+
+## Slide 2: OpenTelemetry vs Open Source Alternatives
+
+| Feature | OpenTelemetry | Jaeger | Zipkin | SkyWalking | Pinpoint | Prometheus |
+| ------------------- | ---------------- | ---------------- | ------------------ | ---------- | ---------- | ---------- |
+| **Tracing** | YES | YES | YES | YES | YES | NO |
+| **Metrics** | YES | NO | NO | YES | YES | YES |
+| **Logs** | YES | NO | NO | YES | NO | NO |
+| **C++ SDK** | YES Official | YES (Deprecated) | YES (Unmaintained) | NO | NO | YES |
+| **Vendor Neutral** | YES Primary goal | NO | NO | NO | NO | NO |
+| **Instrumentation** | Manual + Auto | Manual | Manual | Auto-first | Auto-first | Manual |
+| **Backend** | Any (exporters) | Self | Self | Self | Self | Self |
+| **CNCF Status** | Incubating | Graduated | NO | Incubating | NO | Graduated |
+
+> **Why OpenTelemetry?** It's the only actively maintained, full-featured C++ option with vendor neutrality — allowing export to Jaeger, Prometheus, Grafana, or any commercial backend without changing instrumentation.
+
+---
+
+## Slide 3: Comparison with rippled's Existing Solutions
+
+### Current Observability Stack
+
+| Aspect | PerfLog (JSON) | StatsD (Metrics) | OpenTelemetry (NEW) |
+| --------------------- | --------------------- | --------------------- | --------------------------- |
+| **Type** | Logging | Metrics | Distributed Tracing |
+| **Scope** | Single node | Single node | **Cross-node** |
+| **Data** | JSON log entries | Counters, gauges | Spans with context |
+| **Correlation** | By timestamp | By metric name | By `trace_id` |
+| **Overhead** | Low (file I/O) | Low (UDP) | Low-Medium (configurable) |
+| **Question Answered** | "What happened here?" | "How many? How fast?" | **"What was the journey?"** |
+
+### Use Case Matrix
+
+| Scenario | PerfLog | StatsD | OpenTelemetry |
+| -------------------------------- | ------- | ------ | ------------- |
+| "How many TXs per second?" | ❌ | ✅ | ❌ |
+| "Why was this specific TX slow?" | ⚠️ | ❌ | ✅ |
+| "Which node delayed consensus?" | ❌ | ❌ | ✅ |
+| "Show TX journey across 5 nodes" | ❌ | ❌ | ✅ |
+
+> **Key Insight**: OpenTelemetry **complements** (not replaces) existing systems.
+
+---
+
+## Slide 4: Architecture
+
+### High-Level Integration Architecture
+
+```mermaid
+flowchart TB
+ subgraph rippled["rippled Node"]
+ subgraph services["Core Services"]
+ direction LR
+ RPC["RPC Server
(HTTP/WS)"] ~~~ Overlay["Overlay
(P2P Network)"] ~~~ Consensus["Consensus
(RCLConsensus)"]
+ end
+
+ Telemetry["Telemetry Module
(OpenTelemetry SDK)"]
+
+ services --> Telemetry
+ end
+
+ Telemetry -->|OTLP/gRPC| Collector["OTel Collector"]
+
+ Collector --> Tempo["Grafana Tempo"]
+ Collector --> Jaeger["Jaeger"]
+ Collector --> Elastic["Elastic APM"]
+
+ style rippled fill:#424242,stroke:#212121,color:#fff
+ style services fill:#1565c0,stroke:#0d47a1,color:#fff
+ style Telemetry fill:#2e7d32,stroke:#1b5e20,color:#fff
+ style Collector fill:#e65100,stroke:#bf360c,color:#fff
+```
+
+### Context Propagation
+
+```mermaid
+sequenceDiagram
+ participant Client
+ participant NodeA as Node A
+ participant NodeB as Node B
+
+ Client->>NodeA: Submit TX (no context)
+ Note over NodeA: Creates trace_id: abc123
span: tx.receive
+ NodeA->>NodeB: Relay TX
(traceparent: abc123)
+ Note over NodeB: Links to trace_id: abc123
span: tx.relay
+```
+
+- **HTTP/RPC**: W3C Trace Context headers (`traceparent`)
+- **P2P Messages**: Protocol Buffer extension fields
+
+---
+
+## Slide 5: Implementation Plan
+
+### 5-Phase Rollout (9 Weeks)
+
+```mermaid
+gantt
+ title Implementation Timeline
+ dateFormat YYYY-MM-DD
+ axisFormat Week %W
+
+ section Phase 1
+ Core Infrastructure :p1, 2024-01-01, 2w
+
+ section Phase 2
+ RPC Tracing :p2, after p1, 2w
+
+ section Phase 3
+ Transaction Tracing :p3, after p2, 2w
+
+ section Phase 4
+ Consensus Tracing :p4, after p3, 2w
+
+ section Phase 5
+ Documentation :p5, after p4, 1w
+```
+
+### Phase Details
+
+| Phase | Focus | Key Deliverables | Effort |
+| ----- | ------------------- | -------------------------------------------- | ------- |
+| 1 | Core Infrastructure | SDK integration, Telemetry interface, Config | 10 days |
+| 2 | RPC Tracing | HTTP context extraction, Handler spans | 10 days |
+| 3 | Transaction Tracing | Protobuf context, P2P relay propagation | 10 days |
+| 4 | Consensus Tracing | Round spans, Proposal/validation tracing | 10 days |
+| 5 | Documentation | Runbook, Dashboards, Training | 7 days |
+
+**Total Effort**: ~47 developer-days (2 developers)
+
+---
+
+## Slide 6: Performance Overhead
+
+### Estimated System Impact
+
+| Metric | Overhead | Notes |
+| ----------------- | ---------- | ----------------------------------- |
+| **CPU** | 1-3% | Span creation and attribute setting |
+| **Memory** | 2-5 MB | Batch buffer for pending spans |
+| **Network** | 10-50 KB/s | Compressed OTLP export to collector |
+| **Latency (p99)** | <2% | With proper sampling configuration |
+
+### Per-Message Overhead (Context Propagation)
+
+Each P2P message carries trace context with the following overhead:
+
+| Field | Size | Description |
+| ------------- | ------------- | ----------------------------------------- |
+| `trace_id` | 16 bytes | Unique identifier for the entire trace |
+| `span_id` | 8 bytes | Current span (becomes parent on receiver) |
+| `trace_flags` | 4 bytes | Sampling decision flags |
+| `trace_state` | 0-4 bytes | Optional vendor-specific data |
+| **Total** | **~32 bytes** | **Added per traced P2P message** |
+
+```mermaid
+flowchart LR
+ subgraph msg["P2P Message with Trace Context"]
+ A["Original Message
(variable size)"] --> B["+ TraceContext
(~32 bytes)"]
+ end
+
+ subgraph breakdown["Context Breakdown"]
+ C["trace_id
16 bytes"]
+ D["span_id
8 bytes"]
+ E["flags
4 bytes"]
+ F["state
0-4 bytes"]
+ end
+
+ B --> breakdown
+
+ style A fill:#424242,stroke:#212121,color:#fff
+ style B fill:#2e7d32,stroke:#1b5e20,color:#fff
+ style C fill:#1565c0,stroke:#0d47a1,color:#fff
+ style D fill:#1565c0,stroke:#0d47a1,color:#fff
+ style E fill:#e65100,stroke:#bf360c,color:#fff
+ style F fill:#4a148c,stroke:#2e0d57,color:#fff
+```
+
+> **Note**: 32 bytes is negligible compared to typical transaction messages (hundreds to thousands of bytes)
+
+### Mitigation Strategies
+
+```mermaid
+flowchart LR
+ A["Head Sampling
10% default"] --> B["Tail Sampling
Keep errors/slow"] --> C["Batch Export
Reduce I/O"] --> D["Conditional Compile
XRPL_ENABLE_TELEMETRY"]
+
+ style A fill:#1565c0,stroke:#0d47a1,color:#fff
+ style B fill:#2e7d32,stroke:#1b5e20,color:#fff
+ style C fill:#e65100,stroke:#bf360c,color:#fff
+ style D fill:#4a148c,stroke:#2e0d57,color:#fff
+```
+
+### Kill Switches (Rollback Options)
+
+1. **Config Disable**: Set `enabled=0` in config → instant disable, no restart needed for sampling
+2. **Rebuild**: Compile with `XRPL_ENABLE_TELEMETRY=OFF` → zero overhead (no-op)
+3. **Full Revert**: Clean separation allows easy commit reversion
+
+---
+
+## Slide 7: Data Collection & Privacy
+
+### What Data is Collected
+
+| Category | Attributes Collected | Purpose |
+| --------------- | ---------------------------------------------------------------------------------- | --------------------------- |
+| **Transaction** | `tx.hash`, `tx.type`, `tx.result`, `tx.fee`, `ledger_index` | Trace transaction lifecycle |
+| **Consensus** | `round`, `phase`, `mode`, `proposers`(public key or public node id), `duration_ms` | Analyze consensus timing |
+| **RPC** | `command`, `version`, `status`, `duration_ms` | Monitor RPC performance |
+| **Peer** | `peer.id`(public key), `latency_ms`, `message.type`, `message.size` | Network topology analysis |
+| **Ledger** | `ledger.hash`, `ledger.index`, `close_time`, `tx_count` | Ledger progression tracking |
+| **Job** | `job.type`, `queue_ms`, `worker` | JobQueue performance |
+
+### What is NOT Collected (Privacy Guarantees)
+
+```mermaid
+flowchart LR
+ subgraph notCollected["❌ NOT Collected"]
+ direction LR
+ A["Private Keys"] ~~~ B["Account Balances"] ~~~ C["Transaction Amounts"]
+ end
+
+ subgraph alsoNot["❌ Also Excluded"]
+ direction LR
+ D["IP Addresses
(configurable)"] ~~~ E["Personal Data"] ~~~ F["Raw TX Payloads"]
+ end
+
+ style A fill:#c62828,stroke:#8c2809,color:#fff
+ style B fill:#c62828,stroke:#8c2809,color:#fff
+ style C fill:#c62828,stroke:#8c2809,color:#fff
+ style D fill:#c62828,stroke:#8c2809,color:#fff
+ style E fill:#c62828,stroke:#8c2809,color:#fff
+ style F fill:#c62828,stroke:#8c2809,color:#fff
+```
+
+### Privacy Protection Mechanisms
+
+| Mechanism | Description |
+| -------------------------- | ------------------------------------------------------------- |
+| **Account Hashing** | `xrpl.tx.account` is hashed at collector level before storage |
+| **Configurable Redaction** | Sensitive fields can be excluded via config |
+| **Sampling** | Only 10% of traces recorded by default (reduces exposure) |
+| **Local Control** | Node operators control what gets exported |
+| **No Raw Payloads** | Transaction content is never recorded, only metadata |
+
+> **Key Principle**: Telemetry collects **operational metadata** (timing, counts, hashes) — never **sensitive content** (keys, balances, amounts).
+
+---
+
+_End of Presentation_
diff --git a/src/xrpld/rpc/detail/RPCHandler.cpp b/src/xrpld/rpc/detail/RPCHandler.cpp
index 1d8e1168b4..b19a2ba184 100644
--- a/src/xrpld/rpc/detail/RPCHandler.cpp
+++ b/src/xrpld/rpc/detail/RPCHandler.cpp
@@ -8,6 +8,7 @@
#include
#include
#include
+#include
#include
#include
@@ -157,6 +158,11 @@ template
Status
callMethod(JsonContext& context, Method method, std::string const& name, Object& result)
{
+ XRPL_TRACE_RPC(context.app.getTelemetry(), "rpc.command." + name);
+ XRPL_TRACE_SET_ATTR("xrpl.rpc.command", name.c_str());
+ XRPL_TRACE_SET_ATTR("xrpl.rpc.version", static_cast(context.apiVersion));
+ XRPL_TRACE_SET_ATTR("xrpl.rpc.role", (context.role == Role::ADMIN ? "admin" : "user"));
+
static std::atomic requestId{0};
auto& perfLog = context.app.getPerfLog();
std::uint64_t const curId = ++requestId;
@@ -172,12 +178,15 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
JLOG(context.j.debug()) << "RPC call " << name << " completed in "
<< ((end - start).count() / 1000000000.0) << "seconds";
perfLog.rpcFinish(name, curId);
+ XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");
return ret;
}
catch (std::exception& e)
{
perfLog.rpcError(name, curId);
JLOG(context.j.info()) << "Caught throw: " << e.what();
+ XRPL_TRACE_EXCEPTION(e);
+ XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "error");
if (context.loadType == Resource::feeReferenceRPC)
context.loadType = Resource::feeExceptionRPC;
diff --git a/src/xrpld/rpc/detail/ServerHandler.cpp b/src/xrpld/rpc/detail/ServerHandler.cpp
index e5cc7a83bf..c3460333f1 100644
--- a/src/xrpld/rpc/detail/ServerHandler.cpp
+++ b/src/xrpld/rpc/detail/ServerHandler.cpp
@@ -7,6 +7,7 @@
#include
#include
#include
+#include
#include
#include
@@ -266,6 +267,8 @@ buffers_to_string(ConstBufferSequence const& bs)
void
ServerHandler::onRequest(Session& session)
{
+ XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.request");
+
// Make sure RPC is enabled on the port
if (session.port().protocol.count("http") == 0 && session.port().protocol.count("https") == 0)
{
@@ -381,6 +384,7 @@ ServerHandler::processSession(
std::shared_ptr const& coro,
Json::Value const& jv)
{
+ XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.ws_message");
auto is = std::static_pointer_cast(session->appDefined);
if (is->getConsumer().disconnect(m_journal))
{
@@ -573,6 +577,7 @@ ServerHandler::processRequest(
std::string_view forwardedFor,
std::string_view user)
{
+ XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.process");
auto rpcJ = app_.getJournal("RPC");
Json::Value jsonOrig;
diff --git a/src/xrpld/telemetry/TracingInstrumentation.h b/src/xrpld/telemetry/TracingInstrumentation.h
new file mode 100644
index 0000000000..d7d2ecf912
--- /dev/null
+++ b/src/xrpld/telemetry/TracingInstrumentation.h
@@ -0,0 +1,115 @@
+#pragma once
+
+/** Convenience macros for instrumenting code with OpenTelemetry trace spans.
+
+ When XRPL_ENABLE_TELEMETRY is defined, the macros create SpanGuard objects
+ that manage span lifetime via RAII. When not defined, all macros expand to
+ ((void)0) with zero overhead.
+
+ Usage in instrumented code:
+ @code
+ XRPL_TRACE_RPC(app.getTelemetry(), "rpc.command." + name);
+ XRPL_TRACE_SET_ATTR("xrpl.rpc.command", name);
+ XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");
+ @endcode
+
+ @note Macro parameter names use leading/trailing underscores
+ (e.g. _tel_obj_) to avoid colliding with identifiers in the macro body,
+ specifically the ::xrpl::telemetry:: namespace qualifier.
+*/
+
+#ifdef XRPL_ENABLE_TELEMETRY
+
+#include
+#include
+
+#include
+
+namespace xrpl {
+namespace telemetry {
+
+/** Start an unconditional span, ended when the guard goes out of scope.
+ @param _tel_obj_ Telemetry instance reference.
+ @param _span_name_ Span name string.
+*/
+#define XRPL_TRACE_SPAN(_tel_obj_, _span_name_) \
+ auto _xrpl_span_ = (_tel_obj_).startSpan(_span_name_); \
+ ::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
+
+/** Start an unconditional span with a specific SpanKind.
+ @param _tel_obj_ Telemetry instance reference.
+ @param _span_name_ Span name string.
+ @param _span_kind_ opentelemetry::trace::SpanKind value.
+*/
+#define XRPL_TRACE_SPAN_KIND(_tel_obj_, _span_name_, _span_kind_) \
+ auto _xrpl_span_ = (_tel_obj_).startSpan(_span_name_, _span_kind_); \
+ ::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
+
+/** Conditionally start a span for RPC tracing.
+ The span is only created if shouldTraceRpc() returns true.
+ @param _tel_obj_ Telemetry instance reference.
+ @param _span_name_ Span name string.
+*/
+#define XRPL_TRACE_RPC(_tel_obj_, _span_name_) \
+ std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
+ if ((_tel_obj_).shouldTraceRpc()) \
+ { \
+ _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
+ }
+
+/** Conditionally start a span for transaction tracing.
+ The span is only created if shouldTraceTransactions() returns true.
+ @param _tel_obj_ Telemetry instance reference.
+ @param _span_name_ Span name string.
+*/
+#define XRPL_TRACE_TX(_tel_obj_, _span_name_) \
+ std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
+ if ((_tel_obj_).shouldTraceTransactions()) \
+ { \
+ _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
+ }
+
+/** Conditionally start a span for consensus tracing.
+ The span is only created if shouldTraceConsensus() returns true.
+ @param _tel_obj_ Telemetry instance reference.
+ @param _span_name_ Span name string.
+*/
+#define XRPL_TRACE_CONSENSUS(_tel_obj_, _span_name_) \
+ std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
+ if ((_tel_obj_).shouldTraceConsensus()) \
+ { \
+ _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
+ }
+
+/** Set a key-value attribute on the current span (if it exists).
+ Must be used after one of the XRPL_TRACE_* span macros.
+*/
+#define XRPL_TRACE_SET_ATTR(key, value) \
+ if (_xrpl_guard_.has_value()) \
+ { \
+ _xrpl_guard_->setAttribute(key, value); \
+ }
+
+/** Record an exception on the current span and mark it as error.
+ Must be used after one of the XRPL_TRACE_* span macros.
+*/
+#define XRPL_TRACE_EXCEPTION(e) \
+ if (_xrpl_guard_.has_value()) \
+ { \
+ _xrpl_guard_->recordException(e); \
+ }
+
+} // namespace telemetry
+} // namespace xrpl
+
+#else // XRPL_ENABLE_TELEMETRY not defined
+
+#define XRPL_TRACE_SPAN(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_SPAN_KIND(_tel_obj_, _span_name_, _span_kind_) ((void)0)
+#define XRPL_TRACE_RPC(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_TX(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_CONSENSUS(_tel_obj_, _span_name_) ((void)0)
+#define XRPL_TRACE_SET_ATTR(key, value) ((void)0)
+#define XRPL_TRACE_EXCEPTION(e) ((void)0)
+
+#endif // XRPL_ENABLE_TELEMETRY