mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-04 01:06:48 +00:00
fix(telemetry): address Phase 1b code review findings
Redesign SpanGuard with pimpl idiom to hide all OpenTelemetry types from public headers. Add global Telemetry accessor so SpanGuard factory methods work without explicit Telemetry references. Add child/linked span creation and cross-thread context propagation. Update plan docs to reflect macro removal in favor of SpanGuard factory pattern. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -12,10 +12,10 @@ The telemetry implementation follows rippled's existing code organization patter
|
||||
```
|
||||
include/xrpl/
|
||||
├── telemetry/
|
||||
│ ├── Telemetry.h # Main telemetry interface
|
||||
│ ├── Telemetry.h # Main telemetry interface (global singleton)
|
||||
│ ├── TelemetryConfig.h # Configuration structures
|
||||
│ ├── TraceContext.h # Context propagation utilities
|
||||
│ ├── SpanGuard.h # RAII span management with discard()
|
||||
│ ├── SpanGuard.h # RAII span management with factory methods + discard()
|
||||
│ ├── DiscardFlag.h # Thread-local discard flag
|
||||
│ └── SpanAttributes.h # Attribute helper functions
|
||||
|
||||
@@ -25,11 +25,6 @@ src/libxrpl/
|
||||
│ ├── TelemetryConfig.cpp # Config parsing
|
||||
│ ├── TraceContext.cpp # Context serialization
|
||||
│ └── NullTelemetry.cpp # No-op implementation
|
||||
|
||||
src/xrpld/
|
||||
├── telemetry/
|
||||
│ ├── TracingInstrumentation.h # Instrumentation macros
|
||||
│ └── TracingInstrumentation.cpp
|
||||
```
|
||||
|
||||
---
|
||||
@@ -315,20 +310,20 @@ flowchart TD
|
||||
|
||||
### 3.7.3 Conditional Instrumentation
|
||||
|
||||
```cpp
|
||||
// Compile-time feature flag
|
||||
#ifndef XRPL_ENABLE_TELEMETRY
|
||||
// Zero-cost when disabled
|
||||
#define XRPL_TRACE_SPAN(t, n) ((void)0)
|
||||
#endif
|
||||
SpanGuard's static factory methods handle both compile-time and runtime
|
||||
checks internally. When `XRPL_ENABLE_TELEMETRY` is not defined, the
|
||||
entire SpanGuard class compiles to a no-op stub with empty method bodies.
|
||||
When it is defined, the factory methods check the global Telemetry
|
||||
instance and the relevant component filter before creating a span:
|
||||
|
||||
// Runtime component filtering
|
||||
if (telemetry.shouldTracePeer())
|
||||
{
|
||||
XRPL_TRACE_SPAN(telemetry, "peer.message.receive");
|
||||
// ... instrumentation
|
||||
}
|
||||
// No overhead when component tracing disabled
|
||||
```cpp
|
||||
// SpanGuard factory methods handle all conditional logic internally.
|
||||
// When XRPL_ENABLE_TELEMETRY is not defined, these are no-ops.
|
||||
// When defined, they check Telemetry::getInstance() and the
|
||||
// component filter (e.g. shouldTracePeer()) at runtime.
|
||||
auto span = telemetry::SpanGuard::peerSpan("peer.message.receive");
|
||||
span.setAttribute("xrpl.peer.id", peerId);
|
||||
// No overhead when telemetry is disabled at compile time or runtime
|
||||
```
|
||||
|
||||
---
|
||||
@@ -351,7 +346,7 @@ This section provides a detailed assessment of how intrusive the OpenTelemetry i
|
||||
|
||||
| Component | Files Modified | Lines Added | Lines Changed | Architectural Impact |
|
||||
| --------------------- | -------------- | ----------- | ------------- | -------------------- |
|
||||
| **Core Telemetry** | 5 new files | ~800 | 0 | None (new module) |
|
||||
| **Core Telemetry** | 7 new files | ~800 | 0 | None (new module) |
|
||||
| **Application Init** | 2 files | ~30 | ~5 | Minimal |
|
||||
| **RPC Layer** | 3 files | ~80 | ~20 | Minimal |
|
||||
| **Transaction Relay** | 4 files | ~120 | ~40 | Low |
|
||||
@@ -361,7 +356,7 @@ This section provides a detailed assessment of how intrusive the OpenTelemetry i
|
||||
| **PathFinding** | 2 | ~80 | ~5 | Minimal |
|
||||
| **TxQ/Fee** | 2 | ~60 | ~5 | Minimal |
|
||||
| **Validator/Amend** | 3 | ~40 | ~5 | Minimal |
|
||||
| **Total** | **~28 files** | **~1,490** | **~120** | **Low** |
|
||||
| **Total** | **~27 files** | **~1,490** | **~120** | **Low** |
|
||||
|
||||
### 3.9.2 Detailed File Impact
|
||||
|
||||
@@ -381,16 +376,15 @@ pie title Code Changes by Component
|
||||
|
||||
#### New Files (No Impact on Existing Code)
|
||||
|
||||
| File | Lines | Purpose |
|
||||
| ---------------------------------------------- | ----- | --------------------------------------- |
|
||||
| `include/xrpl/telemetry/Telemetry.h` | ~160 | Main interface |
|
||||
| `include/xrpl/telemetry/SpanGuard.h` | ~120 | RAII wrapper + discard |
|
||||
| `include/xrpl/telemetry/DiscardFlag.h` | ~28 | Thread-local discard flag |
|
||||
| `include/xrpl/telemetry/TraceContext.h` | ~80 | Context propagation |
|
||||
| `src/xrpld/telemetry/TracingInstrumentation.h` | ~60 | Macros |
|
||||
| `src/libxrpl/telemetry/Telemetry.cpp` | ~400 | Implementation + FilteringSpanProcessor |
|
||||
| `src/libxrpl/telemetry/TelemetryConfig.cpp` | ~60 | Config parsing |
|
||||
| `src/libxrpl/telemetry/NullTelemetry.cpp` | ~40 | No-op implementation |
|
||||
| File | Lines | Purpose |
|
||||
| ------------------------------------------- | ----- | ----------------------------------------------------- |
|
||||
| `include/xrpl/telemetry/Telemetry.h` | ~160 | Main interface (global singleton) |
|
||||
| `include/xrpl/telemetry/SpanGuard.h` | ~250 | RAII wrapper + factory methods + discard + no-op stub |
|
||||
| `include/xrpl/telemetry/DiscardFlag.h` | ~28 | Thread-local discard flag |
|
||||
| `include/xrpl/telemetry/TraceContext.h` | ~80 | Context propagation |
|
||||
| `src/libxrpl/telemetry/Telemetry.cpp` | ~400 | Implementation + FilteringSpanProcessor |
|
||||
| `src/libxrpl/telemetry/TelemetryConfig.cpp` | ~60 | Config parsing |
|
||||
| `src/libxrpl/telemetry/NullTelemetry.cpp` | ~40 | No-op implementation |
|
||||
|
||||
#### Modified Files (Existing Rippled Code)
|
||||
|
||||
@@ -493,18 +487,24 @@ void ServerHandler::onRequest(...) {
|
||||
send(result);
|
||||
}
|
||||
|
||||
// After (only ~10 lines added)
|
||||
// After (only ~4 lines added)
|
||||
void ServerHandler::onRequest(...) {
|
||||
XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.request"); // +1 line
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.command", command); // +1 line
|
||||
auto span = telemetry::SpanGuard::rpcSpan("rpc.request"); // +1 line
|
||||
span.setAttribute("xrpl.rpc.command", command); // +1 line
|
||||
|
||||
auto result = processRequest(req);
|
||||
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.status", status); // +1 line
|
||||
span.setAttribute("xrpl.rpc.status", status); // +1 line
|
||||
send(result);
|
||||
}
|
||||
```
|
||||
|
||||
SpanGuard factory methods (`rpcSpan`, `txSpan`, `consensusSpan`, etc.)
|
||||
access the global `Telemetry` instance internally and check the relevant
|
||||
component filter (`shouldTraceRpc()`, etc.) before creating a span. The
|
||||
public SpanGuard header has zero `opentelemetry/` includes -- all OTel
|
||||
types are hidden behind the pimpl idiom.
|
||||
|
||||
**Consensus Instrumentation (Medium Intrusiveness):**
|
||||
|
||||
```cpp
|
||||
@@ -515,11 +515,11 @@ void RCLConsensusAdaptor::startRound(...) {
|
||||
|
||||
// After (context storage required)
|
||||
void RCLConsensusAdaptor::startRound(...) {
|
||||
XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.round");
|
||||
XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq", seq);
|
||||
auto span = telemetry::SpanGuard::consensusSpan("consensus.round");
|
||||
span.setAttribute("xrpl.consensus.ledger.seq", seq);
|
||||
|
||||
// Store context for child spans in phase transitions
|
||||
currentRoundContext_ = _xrpl_guard_->context(); // New member variable
|
||||
currentRoundContext_ = span.context(); // New member variable
|
||||
|
||||
// ... existing logic unchanged
|
||||
}
|
||||
|
||||
@@ -181,271 +181,227 @@ setup_Telemetry(
|
||||
|
||||
---
|
||||
|
||||
## 4.2 RAII Span Guard
|
||||
## 4.2 RAII Span Guard with Factory Methods
|
||||
|
||||
SpanGuard is a self-contained RAII wrapper that creates, activates, and
|
||||
ends trace spans. It uses the pimpl idiom to hide all OpenTelemetry
|
||||
types -- the public header has **zero `opentelemetry/` includes**.
|
||||
Callers never interact with OTel SDK types directly.
|
||||
|
||||
SpanGuard provides static factory methods (`rpcSpan()`, `txSpan()`,
|
||||
`consensusSpan()`, etc.) that access the global `Telemetry` singleton
|
||||
internally. Each factory checks both the runtime enable flag and the
|
||||
relevant component filter before creating a span.
|
||||
|
||||
When `XRPL_ENABLE_TELEMETRY` is **not** defined, the entire SpanGuard
|
||||
class compiles to a no-op stub with empty inline method bodies, giving
|
||||
zero compile-time and runtime cost.
|
||||
|
||||
```cpp
|
||||
// include/xrpl/telemetry/SpanGuard.h
|
||||
//
|
||||
// Public API -- no opentelemetry/ includes.
|
||||
// OTel types are hidden behind the pimpl (Impl struct, defined in the
|
||||
// #ifdef XRPL_ENABLE_TELEMETRY section at the bottom of the header).
|
||||
#pragma once
|
||||
|
||||
#include <opentelemetry/trace/span.h>
|
||||
#include <opentelemetry/trace/scope.h>
|
||||
#include <opentelemetry/trace/status.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string_view>
|
||||
#include <exception>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
/**
|
||||
* RAII guard for OpenTelemetry spans.
|
||||
*
|
||||
* Automatically ends the span on destruction and makes it the current
|
||||
* span in the thread-local context.
|
||||
*/
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
class SpanGuard
|
||||
{
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span_;
|
||||
opentelemetry::trace::Scope scope_;
|
||||
struct Impl; // pimpl -- defined in .cpp or
|
||||
std::unique_ptr<Impl> impl_; // in the guarded section below
|
||||
|
||||
public:
|
||||
/**
|
||||
* Construct guard with span.
|
||||
* The span becomes the current span in thread-local context.
|
||||
*
|
||||
* @note If span is nullptr (e.g., telemetry disabled), the guard
|
||||
* becomes a no-op. All methods safely check for null before access.
|
||||
*/
|
||||
explicit SpanGuard(
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span)
|
||||
: span_(span ? std::move(span) : nullptr)
|
||||
, scope_(span_ ? opentelemetry::trace::Scope(span_)
|
||||
: opentelemetry::trace::Scope(
|
||||
opentelemetry::nostd::shared_ptr<
|
||||
opentelemetry::trace::Span>(nullptr)))
|
||||
{
|
||||
}
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// FACTORY METHODS (access global Telemetry internally)
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
// Non-copyable, non-movable
|
||||
/** Create a span for RPC request handling.
|
||||
* Returns a no-op guard if telemetry is disabled or
|
||||
* shouldTraceRpc() is false.
|
||||
*/
|
||||
static SpanGuard rpcSpan(std::string_view name);
|
||||
|
||||
/** Create a span for transaction processing. */
|
||||
static SpanGuard txSpan(std::string_view name);
|
||||
|
||||
/** Create a span for consensus rounds. */
|
||||
static SpanGuard consensusSpan(std::string_view name);
|
||||
|
||||
/** Create a span for peer-to-peer messages. */
|
||||
static SpanGuard peerSpan(std::string_view name);
|
||||
|
||||
/** Create a span for ledger operations. */
|
||||
static SpanGuard ledgerSpan(std::string_view name);
|
||||
|
||||
/** Create an uncategorized span (always created when enabled). */
|
||||
static SpanGuard span(std::string_view name);
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// INSTANCE METHODS
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
SpanGuard(); // constructs a no-op guard
|
||||
~SpanGuard();
|
||||
SpanGuard(SpanGuard&& other) noexcept;
|
||||
SpanGuard& operator=(SpanGuard&&) = delete;
|
||||
SpanGuard(SpanGuard const&) = delete;
|
||||
SpanGuard& operator=(SpanGuard const&) = delete;
|
||||
SpanGuard(SpanGuard&&) = delete;
|
||||
SpanGuard& operator=(SpanGuard&&) = delete;
|
||||
|
||||
~SpanGuard()
|
||||
{
|
||||
if (span_)
|
||||
span_->End();
|
||||
}
|
||||
/** Mark the span status as OK. */
|
||||
void setOk();
|
||||
|
||||
/** Access the underlying span */
|
||||
opentelemetry::trace::Span& span() { return *span_; }
|
||||
opentelemetry::trace::Span const& span() const { return *span_; }
|
||||
/** Set an explicit status code. */
|
||||
void setStatus(int code, std::string_view description = "");
|
||||
|
||||
/** Set span status to OK */
|
||||
void setOk()
|
||||
{
|
||||
span_->SetStatus(opentelemetry::trace::StatusCode::kOk);
|
||||
}
|
||||
|
||||
/** Set span status with code and description */
|
||||
void setStatus(
|
||||
opentelemetry::trace::StatusCode code,
|
||||
std::string_view description = "")
|
||||
{
|
||||
span_->SetStatus(code, std::string(description));
|
||||
}
|
||||
|
||||
/** Set an attribute on the span */
|
||||
/** Set a key-value attribute on the span. */
|
||||
template<typename T>
|
||||
void setAttribute(std::string_view key, T&& value)
|
||||
{
|
||||
span_->SetAttribute(
|
||||
opentelemetry::nostd::string_view(key.data(), key.size()),
|
||||
std::forward<T>(value));
|
||||
}
|
||||
void setAttribute(std::string_view key, T&& value);
|
||||
|
||||
/** Add an event to the span */
|
||||
void addEvent(std::string_view name)
|
||||
{
|
||||
span_->AddEvent(std::string(name));
|
||||
}
|
||||
/** Add an event to the span timeline. */
|
||||
void addEvent(std::string_view name);
|
||||
|
||||
/** Record an exception on the span */
|
||||
void recordException(std::exception const& e)
|
||||
{
|
||||
span_->RecordException(e);
|
||||
span_->SetStatus(
|
||||
opentelemetry::trace::StatusCode::kError,
|
||||
e.what());
|
||||
}
|
||||
/** Record an exception and set error status. */
|
||||
void recordException(std::exception const& e);
|
||||
|
||||
/** Get the current trace context */
|
||||
opentelemetry::context::Context context() const
|
||||
{
|
||||
return opentelemetry::context::RuntimeContext::GetCurrent();
|
||||
}
|
||||
/** Get the current trace context (for cross-thread propagation). */
|
||||
// Returns an opaque context handle.
|
||||
auto context() const;
|
||||
|
||||
/** Discard this span -- dropped before export. */
|
||||
void discard();
|
||||
};
|
||||
|
||||
/**
|
||||
* No-op span guard for when tracing is disabled.
|
||||
* Provides the same interface but does nothing.
|
||||
*/
|
||||
class NullSpanGuard
|
||||
#else // XRPL_ENABLE_TELEMETRY not defined -- zero-cost stub
|
||||
|
||||
class SpanGuard
|
||||
{
|
||||
public:
|
||||
NullSpanGuard() = default;
|
||||
// Factory methods -- all return no-op guards
|
||||
static SpanGuard rpcSpan(std::string_view) { return {}; }
|
||||
static SpanGuard txSpan(std::string_view) { return {}; }
|
||||
static SpanGuard consensusSpan(std::string_view) { return {}; }
|
||||
static SpanGuard peerSpan(std::string_view) { return {}; }
|
||||
static SpanGuard ledgerSpan(std::string_view) { return {}; }
|
||||
static SpanGuard span(std::string_view) { return {}; }
|
||||
|
||||
// Instance methods -- all no-ops
|
||||
void setOk() {}
|
||||
void setStatus(opentelemetry::trace::StatusCode, std::string_view = "") {}
|
||||
void setStatus(int, std::string_view = "") {}
|
||||
|
||||
template<typename T>
|
||||
void setAttribute(std::string_view, T&&) {}
|
||||
|
||||
void addEvent(std::string_view) {}
|
||||
void recordException(std::exception const&) {}
|
||||
|
||||
/** Return a default empty context (matches SpanGuard interface) */
|
||||
opentelemetry::context::Context context() const
|
||||
{
|
||||
return opentelemetry::context::Context{};
|
||||
}
|
||||
void discard() {}
|
||||
};
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4.3 Instrumentation Macros
|
||||
## 4.3 SpanGuard API Reference
|
||||
|
||||
The previous macro-based approach (`TracingInstrumentation.h` with
|
||||
`XRPL_TRACE_*` macros) has been replaced by SpanGuard's static factory
|
||||
methods. This eliminates preprocessor macros from instrumentation call
|
||||
sites and provides a cleaner, type-safe API.
|
||||
|
||||
### 4.3.1 Factory Methods
|
||||
|
||||
Each factory method accesses the global `Telemetry::getInstance()`
|
||||
singleton internally and checks the corresponding component filter.
|
||||
If telemetry is disabled (compile-time or runtime) or the component
|
||||
filter is off, the factory returns a no-op guard whose methods are
|
||||
all empty inlines.
|
||||
|
||||
| Factory Method | Component Filter | Typical Span Names |
|
||||
| -------------------------------- | --------------------------- | ------------------------------------ |
|
||||
| `SpanGuard::rpcSpan(name)` | `shouldTraceRpc()` | `rpc.request`, `rpc.command.submit` |
|
||||
| `SpanGuard::txSpan(name)` | `shouldTraceTransactions()` | `tx.receive`, `tx.validate` |
|
||||
| `SpanGuard::consensusSpan(name)` | `shouldTraceConsensus()` | `consensus.round`, `consensus.phase` |
|
||||
| `SpanGuard::peerSpan(name)` | `shouldTracePeer()` | `peer.message.receive` |
|
||||
| `SpanGuard::ledgerSpan(name)` | `shouldTraceLedger()` | `ledger.close`, `ledger.accept` |
|
||||
| `SpanGuard::span(name)` | (always, if enabled) | `job.execute`, custom spans |
|
||||
|
||||
### 4.3.2 Usage Pattern
|
||||
|
||||
```cpp
|
||||
// src/xrpld/telemetry/TracingInstrumentation.h
|
||||
#pragma once
|
||||
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
void ServerHandler::onRequest(...)
|
||||
{
|
||||
// Factory creates a span if RPC tracing is enabled, no-op otherwise.
|
||||
// No Telemetry& reference needed -- accessed via global singleton.
|
||||
auto span = telemetry::SpanGuard::rpcSpan("rpc.request");
|
||||
span.setAttribute("xrpl.rpc.command", command);
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// INSTRUMENTATION MACROS
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
auto result = processRequest(req);
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
span.setAttribute("xrpl.rpc.status", result.status());
|
||||
span.setOk();
|
||||
// span ended automatically when it goes out of scope
|
||||
}
|
||||
```
|
||||
|
||||
// Start a span that is automatically ended when guard goes out of scope
|
||||
#define XRPL_TRACE_SPAN(telemetry, name) \
|
||||
auto _xrpl_span_ = (telemetry).startSpan(name); \
|
||||
::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
|
||||
### 4.3.3 Compile-Time Disabled Behavior
|
||||
|
||||
// Start a span with specific kind
|
||||
#define XRPL_TRACE_SPAN_KIND(telemetry, name, kind) \
|
||||
auto _xrpl_span_ = (telemetry).startSpan(name, kind); \
|
||||
::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
|
||||
When `XRPL_ENABLE_TELEMETRY` is **not** defined, SpanGuard compiles to
|
||||
a zero-cost no-op stub. All factory methods return a default-constructed
|
||||
guard, and all instance methods have empty bodies:
|
||||
|
||||
// Conditional span based on component
|
||||
#define XRPL_TRACE_TX(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTraceTransactions()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
```cpp
|
||||
// When XRPL_ENABLE_TELEMETRY is not defined:
|
||||
class SpanGuard
|
||||
{
|
||||
public:
|
||||
static SpanGuard rpcSpan(std::string_view) { return {}; }
|
||||
static SpanGuard txSpan(std::string_view) { return {}; }
|
||||
static SpanGuard consensusSpan(std::string_view) { return {}; }
|
||||
static SpanGuard peerSpan(std::string_view) { return {}; }
|
||||
static SpanGuard ledgerSpan(std::string_view) { return {}; }
|
||||
static SpanGuard span(std::string_view) { return {}; }
|
||||
|
||||
#define XRPL_TRACE_CONSENSUS(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTraceConsensus()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
void setOk() {}
|
||||
void setStatus(int, std::string_view = "") {}
|
||||
template<typename T>
|
||||
void setAttribute(std::string_view, T&&) {}
|
||||
void addEvent(std::string_view) {}
|
||||
void recordException(std::exception const&) {}
|
||||
void discard() {}
|
||||
};
|
||||
```
|
||||
|
||||
#define XRPL_TRACE_RPC(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTraceRpc()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
The compiler optimizes away all calls to these empty methods, producing
|
||||
the same binary as if no instrumentation code were present.
|
||||
|
||||
#define XRPL_TRACE_PEER(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTracePeer()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
### 4.3.4 Discard Support
|
||||
|
||||
#define XRPL_TRACE_LEDGER(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTraceLedger()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
SpanGuard supports discarding a span before it is exported. This is
|
||||
useful for filtering out uninteresting spans (e.g. successful
|
||||
preflight checks) after the span has been started:
|
||||
|
||||
#define XRPL_TRACE_PATHFIND(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTracePathfind()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
|
||||
#define XRPL_TRACE_TXQ(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTraceTxQ()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
|
||||
#define XRPL_TRACE_VALIDATOR(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTraceValidator()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
|
||||
#define XRPL_TRACE_AMENDMENT(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTraceAmendment()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
|
||||
// Set attribute on current span (if exists).
|
||||
// Works with both std::optional<SpanGuard> (from conditional macros)
|
||||
// and bare SpanGuard (from XRPL_TRACE_SPAN). Uses 'if constexpr'-like
|
||||
// dispatch via a helper that checks for .has_value().
|
||||
#define XRPL_TRACE_SET_ATTR(key, value) \
|
||||
do { \
|
||||
if constexpr (requires { _xrpl_guard_.has_value(); }) { \
|
||||
if (_xrpl_guard_.has_value()) \
|
||||
_xrpl_guard_->setAttribute(key, value); \
|
||||
} else { \
|
||||
_xrpl_guard_.setAttribute(key, value); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
// Record exception on current span
|
||||
#define XRPL_TRACE_EXCEPTION(e) \
|
||||
do { \
|
||||
if constexpr (requires { _xrpl_guard_.has_value(); }) { \
|
||||
if (_xrpl_guard_.has_value()) \
|
||||
_xrpl_guard_->recordException(e); \
|
||||
} else { \
|
||||
_xrpl_guard_.recordException(e); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#else // XRPL_ENABLE_TELEMETRY not defined
|
||||
|
||||
#define XRPL_TRACE_SPAN(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_SPAN_KIND(telemetry, name, kind) ((void)0)
|
||||
#define XRPL_TRACE_TX(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_CONSENSUS(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_RPC(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_PEER(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_LEDGER(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_PATHFIND(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_TXQ(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_VALIDATOR(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_AMENDMENT(telemetry, name) ((void)0)
|
||||
#define XRPL_TRACE_SET_ATTR(key, value) ((void)0)
|
||||
#define XRPL_TRACE_EXCEPTION(e) ((void)0)
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
```cpp
|
||||
auto span = telemetry::SpanGuard::txSpan("tx.process");
|
||||
auto result = preflight(tx);
|
||||
if (result != tesSUCCESS)
|
||||
{
|
||||
// Span is dropped before entering the batch export queue.
|
||||
span.discard();
|
||||
return result;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
@@ -644,7 +600,7 @@ TraceContextPropagator::inject(
|
||||
```cpp
|
||||
// src/xrpld/overlay/detail/PeerImp.cpp (modified)
|
||||
|
||||
#include <xrpl/telemetry/TracingInstrumentation.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
|
||||
void
|
||||
PeerImp::handleTransaction(
|
||||
@@ -749,7 +705,7 @@ PeerImp::handleTransaction(
|
||||
```cpp
|
||||
// src/xrpld/app/consensus/RCLConsensus.cpp (modified)
|
||||
|
||||
#include <xrpl/telemetry/TracingInstrumentation.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
|
||||
void
|
||||
RCLConsensusAdaptor::startRound(
|
||||
@@ -759,20 +715,18 @@ RCLConsensusAdaptor::startRound(
|
||||
hash_set<NodeID> const& peers,
|
||||
bool proposing)
|
||||
{
|
||||
XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.round");
|
||||
auto span = telemetry::SpanGuard::consensusSpan("consensus.round");
|
||||
|
||||
XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.prev", to_string(prevLedgerHash));
|
||||
XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq",
|
||||
span.setAttribute("xrpl.consensus.ledger.prev", to_string(prevLedgerHash));
|
||||
span.setAttribute("xrpl.consensus.ledger.seq",
|
||||
static_cast<int64_t>(prevLedger.seq() + 1));
|
||||
XRPL_TRACE_SET_ATTR("xrpl.consensus.proposers",
|
||||
span.setAttribute("xrpl.consensus.proposers",
|
||||
static_cast<int64_t>(peers.size()));
|
||||
XRPL_TRACE_SET_ATTR("xrpl.consensus.mode",
|
||||
span.setAttribute("xrpl.consensus.mode",
|
||||
proposing ? "proposing" : "observing");
|
||||
|
||||
// Store trace context for use in phase transitions
|
||||
currentRoundContext_ = _xrpl_guard_.has_value()
|
||||
? _xrpl_guard_->context()
|
||||
: opentelemetry::context::Context{};
|
||||
currentRoundContext_ = span.context();
|
||||
|
||||
// ... existing implementation ...
|
||||
}
|
||||
@@ -844,34 +798,22 @@ RCLConsensusAdaptor::peerProposal(
|
||||
```cpp
|
||||
// src/xrpld/rpc/detail/ServerHandler.cpp (modified)
|
||||
|
||||
#include <xrpl/telemetry/TracingInstrumentation.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
|
||||
void
|
||||
ServerHandler::onRequest(
|
||||
http_request_type&& req,
|
||||
std::function<void(http_response_type&&)>&& send)
|
||||
{
|
||||
// Extract trace context from HTTP headers (W3C Trace Context)
|
||||
auto parentCtx = telemetry::TraceContextPropagator::extractFromHeaders(
|
||||
[&req](std::string_view name) -> std::optional<std::string> {
|
||||
// Beast's find() accepts a string_view for custom header lookup
|
||||
auto it = req.find(name);
|
||||
if (it != req.end())
|
||||
return std::string(it->value());
|
||||
return std::nullopt;
|
||||
});
|
||||
|
||||
// Start request span
|
||||
auto span = app_.getTelemetry().startSpan(
|
||||
"rpc.request",
|
||||
parentCtx,
|
||||
opentelemetry::trace::SpanKind::kServer);
|
||||
telemetry::SpanGuard guard(span);
|
||||
// SpanGuard::rpcSpan() accesses the global Telemetry instance
|
||||
// and checks shouldTraceRpc() internally. Returns a no-op guard
|
||||
// if tracing is disabled.
|
||||
auto span = telemetry::SpanGuard::rpcSpan("rpc.request");
|
||||
|
||||
// Add HTTP attributes
|
||||
guard.setAttribute("http.method", std::string(req.method_string()));
|
||||
guard.setAttribute("http.target", std::string(req.target()));
|
||||
guard.setAttribute("http.user_agent",
|
||||
span.setAttribute("http.method", std::string(req.method_string()));
|
||||
span.setAttribute("http.target", std::string(req.target()));
|
||||
span.setAttribute("http.user_agent",
|
||||
std::string(req[boost::beast::http::field::user_agent]));
|
||||
|
||||
auto const startTime = std::chrono::steady_clock::now();
|
||||
@@ -885,8 +827,8 @@ ServerHandler::onRequest(
|
||||
|
||||
if (!reader.parse(body, jv))
|
||||
{
|
||||
guard.setStatus(
|
||||
opentelemetry::trace::StatusCode::kError,
|
||||
span.setStatus(
|
||||
/* kError */ 2,
|
||||
"Invalid JSON");
|
||||
sendError(send, "Invalid JSON");
|
||||
return;
|
||||
@@ -899,13 +841,12 @@ ServerHandler::onRequest(
|
||||
? jv["method"].asString()
|
||||
: "unknown";
|
||||
|
||||
guard.setAttribute("xrpl.rpc.command", command);
|
||||
span.setAttribute("xrpl.rpc.command", command);
|
||||
|
||||
// Create child span for command execution
|
||||
auto cmdSpan = app_.getTelemetry().startSpan(
|
||||
"rpc.command." + command);
|
||||
{
|
||||
telemetry::SpanGuard cmdGuard(cmdSpan);
|
||||
auto cmdSpan = telemetry::SpanGuard::rpcSpan(
|
||||
"rpc.command." + command);
|
||||
|
||||
// Execute RPC command
|
||||
auto result = processRequest(jv);
|
||||
@@ -913,42 +854,42 @@ ServerHandler::onRequest(
|
||||
// Record result attributes
|
||||
if (result.isMember("status"))
|
||||
{
|
||||
cmdGuard.setAttribute("xrpl.rpc.status",
|
||||
cmdSpan.setAttribute("xrpl.rpc.status",
|
||||
result["status"].asString());
|
||||
}
|
||||
|
||||
if (result["status"].asString() == "error")
|
||||
{
|
||||
cmdGuard.setStatus(
|
||||
opentelemetry::trace::StatusCode::kError,
|
||||
cmdSpan.setStatus(
|
||||
/* kError */ 2,
|
||||
result.isMember("error_message")
|
||||
? result["error_message"].asString()
|
||||
: "RPC error");
|
||||
}
|
||||
else
|
||||
{
|
||||
cmdGuard.setOk();
|
||||
cmdSpan.setOk();
|
||||
}
|
||||
}
|
||||
|
||||
auto const duration = std::chrono::steady_clock::now() - startTime;
|
||||
guard.setAttribute("http.duration_ms",
|
||||
span.setAttribute("http.duration_ms",
|
||||
std::chrono::duration<double, std::milli>(duration).count());
|
||||
|
||||
// Inject trace context into response headers
|
||||
http_response_type resp;
|
||||
telemetry::TraceContextPropagator::injectToHeaders(
|
||||
guard.context(),
|
||||
span.context(),
|
||||
[&resp](std::string_view name, std::string_view value) {
|
||||
resp.set(std::string(name), std::string(value));
|
||||
});
|
||||
|
||||
guard.setOk();
|
||||
span.setOk();
|
||||
send(std::move(resp));
|
||||
}
|
||||
catch (std::exception const& e)
|
||||
{
|
||||
guard.recordException(e);
|
||||
span.recordException(e);
|
||||
JLOG(journal_.error()) << "RPC request failed: " << e.what();
|
||||
sendError(send, e.what());
|
||||
}
|
||||
@@ -959,92 +900,40 @@ ServerHandler::onRequest(
|
||||
|
||||
> **Architecture note**: `JobQueue` and its inner `Workers` class do not
|
||||
> hold an `Application&` or `ServiceRegistry&`. They receive a
|
||||
> `perf::PerfLog*` at construction. To instrument job execution, a
|
||||
> `telemetry::Telemetry&` must be threaded into `JobQueue`'s constructor
|
||||
> alongside the existing `PerfLog&`, or the trace context can be
|
||||
> captured/restored without starting new spans inside the worker itself.
|
||||
> `perf::PerfLog*` at construction. Because SpanGuard's factory methods
|
||||
> access the global `Telemetry` instance directly, no `Telemetry&`
|
||||
> reference needs to be threaded into `JobQueue`.
|
||||
>
|
||||
> The approach below captures trace context at job-creation time and
|
||||
> restores it when the job executes, so that any spans created _inside_
|
||||
> the job body automatically become children of the original caller's
|
||||
> trace. This requires adding a `telemetry::Telemetry&` to `JobQueue`.
|
||||
> trace.
|
||||
|
||||
```cpp
|
||||
// include/xrpl/core/JobQueue.h (modified)
|
||||
// src/libxrpl/core/detail/JobQueue.cpp (modified -- processTask)
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
#include <opentelemetry/context/context.h>
|
||||
#endif
|
||||
|
||||
class JobQueue : private Workers::Callback
|
||||
{
|
||||
// ... existing members ...
|
||||
|
||||
// Telemetry reference for job execution spans (added alongside
|
||||
// the existing perf::PerfLog& member).
|
||||
telemetry::Telemetry& telemetry_;
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
// Per-job trace context captured at addJob() time and restored
|
||||
// on the worker thread when the job runs.
|
||||
struct JobContext
|
||||
{
|
||||
opentelemetry::context::Context traceCtx;
|
||||
};
|
||||
#endif
|
||||
|
||||
public:
|
||||
JobQueue(
|
||||
int threadCount,
|
||||
beast::insight::Collector::ptr const& collector,
|
||||
beast::Journal journal,
|
||||
Logs& logs,
|
||||
perf::PerfLog& perfLog,
|
||||
telemetry::Telemetry& telemetry); // New parameter
|
||||
// ...
|
||||
};
|
||||
```
|
||||
|
||||
```cpp
|
||||
// src/libxrpl/core/detail/JobQueue.cpp (modified — processTask)
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
|
||||
void
|
||||
JobQueue::processTask(int instance)
|
||||
{
|
||||
// ... existing job dequeue logic ...
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
// Restore the trace context that was captured when the job was
|
||||
// enqueued. Any spans created inside the job body will become
|
||||
// children of the original caller's trace.
|
||||
auto token = opentelemetry::context::RuntimeContext::Attach(
|
||||
job.traceContext());
|
||||
|
||||
// Start an execution span if telemetry is enabled at runtime
|
||||
std::optional<telemetry::SpanGuard> guard;
|
||||
if (telemetry_.isEnabled())
|
||||
{
|
||||
guard.emplace(telemetry_.startSpan("job.execute"));
|
||||
guard->setAttribute("xrpl.job.type", to_string(job.type()));
|
||||
guard->setAttribute("xrpl.job.worker",
|
||||
static_cast<int64_t>(instance));
|
||||
}
|
||||
#endif
|
||||
// SpanGuard::span() uses the global Telemetry instance --
|
||||
// no Telemetry& member needed on JobQueue.
|
||||
auto span = telemetry::SpanGuard::span("job.execute");
|
||||
span.setAttribute("xrpl.job.type", to_string(job.type()));
|
||||
span.setAttribute("xrpl.job.worker",
|
||||
static_cast<int64_t>(instance));
|
||||
|
||||
try
|
||||
{
|
||||
job.execute();
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
if (guard)
|
||||
guard->setOk();
|
||||
#endif
|
||||
span.setOk();
|
||||
}
|
||||
catch (std::exception const& e)
|
||||
{
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
if (guard)
|
||||
guard->recordException(e);
|
||||
#endif
|
||||
span.recordException(e);
|
||||
JLOG(journal_.error()) << "Job execution failed: " << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
| [01-architecture-analysis.md](./01-architecture-analysis.md) | RPC request flow (§1.5), key trace points (§1.6), instrumentation priority (§1.7) |
|
||||
| [02-design-decisions.md](./02-design-decisions.md) | SDK selection (§2.1), exporter config (§2.2), span naming (§2.3), attribute schema (§2.4), coexistence with PerfLog/Insight (§2.6) |
|
||||
| [03-implementation-strategy.md](./03-implementation-strategy.md) | Directory structure (§3.1), key principles (§3.2), performance overhead (§3.3-3.6), conditional compilation (§3.7.3), code intrusiveness (§3.9) |
|
||||
| [04-code-samples.md](./04-code-samples.md) | Telemetry interface (§4.1), SpanGuard (§4.2), macros (§4.3), RPC instrumentation (§4.5.3) |
|
||||
| [04-code-samples.md](./04-code-samples.md) | Telemetry interface (§4.1), SpanGuard factory methods (§4.2-4.3), RPC instrumentation (§4.5.3) |
|
||||
| [05-configuration-reference.md](./05-configuration-reference.md) | rippled config (§5.1), config parser (§5.2), Application integration (§5.3), CMake (§5.4), Collector config (§5.5), Docker Compose (§5.6), Grafana (§5.8) |
|
||||
| [06-implementation-phases.md](./06-implementation-phases.md) | Phase 1 core tasks (§6.2), Phase 2 RPC tasks (§6.3), quick wins (§6.10), definition of done (§6.11) |
|
||||
| [07-observability-backends.md](./07-observability-backends.md) | Tempo dev setup (§7.1), Grafana dashboards (§7.6), alert rules (§7.6.3) |
|
||||
@@ -147,9 +147,11 @@
|
||||
- Config parser: `Telemetry::Setup setup_Telemetry(Section const&, std::string const& nodePublicKey, std::string const& version);`
|
||||
|
||||
- Create `include/xrpl/telemetry/SpanGuard.h`:
|
||||
- RAII guard that takes an `nostd::shared_ptr<Span>`, creates a `Scope`, and calls `span->End()` in destructor.
|
||||
- Convenience: `setAttribute()`, `setOk()`, `setStatus()`, `addEvent()`, `recordException()`, `context()`
|
||||
- See [04-code-samples.md](./04-code-samples.md) §4.2 for the full implementation.
|
||||
- RAII guard with static factory methods (`rpcSpan()`, `txSpan()`, `consensusSpan()`, etc.) that access the global `Telemetry::getInstance()` singleton internally.
|
||||
- Uses pimpl idiom to hide all OTel types -- the public header has zero `opentelemetry/` includes.
|
||||
- Convenience instance methods: `setAttribute()`, `setOk()`, `setStatus()`, `addEvent()`, `recordException()`, `context()`, `discard()`
|
||||
- When `XRPL_ENABLE_TELEMETRY` is not defined, the entire class compiles to a no-op stub.
|
||||
- See [04-code-samples.md](./04-code-samples.md) §4.2-4.3 for the full API reference.
|
||||
|
||||
- Create `src/libxrpl/telemetry/NullTelemetry.cpp`:
|
||||
- Implements `Telemetry` with all no-ops.
|
||||
@@ -167,7 +169,7 @@
|
||||
**Reference**:
|
||||
|
||||
- [04-code-samples.md §4.1](./04-code-samples.md) — Full `Telemetry` interface with `Setup` struct, lifecycle, tracer access, span creation, and component filtering methods
|
||||
- [04-code-samples.md §4.2](./04-code-samples.md) — Full `SpanGuard` RAII implementation and `NullSpanGuard` no-op class
|
||||
- [04-code-samples.md §4.2-4.3](./04-code-samples.md) — SpanGuard with factory methods, pimpl design, no-op stub, and discard support
|
||||
- [03-implementation-strategy.md §3.1](./03-implementation-strategy.md) — Directory structure: `include/xrpl/telemetry/` for headers, `src/libxrpl/telemetry/` for implementation
|
||||
- [03-implementation-strategy.md §3.7.3](./03-implementation-strategy.md) — Conditional instrumentation and zero-cost compile-time disabled pattern
|
||||
|
||||
@@ -287,47 +289,37 @@
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Create Instrumentation Macros
|
||||
## Task 5: Add SpanGuard Factory Methods
|
||||
|
||||
**Objective**: Define convenience macros that make instrumenting code one-liners, and that compile to zero-cost no-ops when telemetry is disabled.
|
||||
**Objective**: Add static factory methods to SpanGuard that provide type-safe, one-liner instrumentation and compile to zero-cost no-ops when telemetry is disabled. This replaces the earlier macro-based approach (`TracingInstrumentation.h` has been removed).
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Create `src/xrpld/telemetry/TracingInstrumentation.h`:
|
||||
- When `XRPL_ENABLE_TELEMETRY` is defined:
|
||||
- Update `include/xrpl/telemetry/SpanGuard.h`:
|
||||
- Add static factory methods that access the global `Telemetry::getInstance()` singleton and check the relevant component filter before creating a span:
|
||||
|
||||
```cpp
|
||||
#define XRPL_TRACE_SPAN(telemetry, name) \
|
||||
auto _xrpl_span_ = (telemetry).startSpan(name); \
|
||||
::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
|
||||
|
||||
#define XRPL_TRACE_RPC(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTraceRpc()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
|
||||
#define XRPL_TRACE_SET_ATTR(key, value) \
|
||||
if (_xrpl_guard_.has_value()) { \
|
||||
_xrpl_guard_->setAttribute(key, value); \
|
||||
}
|
||||
|
||||
#define XRPL_TRACE_EXCEPTION(e) \
|
||||
if (_xrpl_guard_.has_value()) { \
|
||||
_xrpl_guard_->recordException(e); \
|
||||
}
|
||||
// Each factory checks the global Telemetry instance internally.
|
||||
// No Telemetry& reference needed at the call site.
|
||||
auto span = telemetry::SpanGuard::rpcSpan("rpc.request");
|
||||
span.setAttribute("xrpl.rpc.command", command);
|
||||
span.setAttribute("xrpl.rpc.status", status);
|
||||
```
|
||||
|
||||
- When `XRPL_ENABLE_TELEMETRY` is NOT defined, all macros expand to `((void)0)`
|
||||
- Factory methods: `rpcSpan()`, `txSpan()`, `consensusSpan()`, `peerSpan()`, `ledgerSpan()`, `span()`
|
||||
- Use the pimpl idiom to hide all OTel types from the public header (zero `opentelemetry/` includes)
|
||||
- When `XRPL_ENABLE_TELEMETRY` is NOT defined, the entire class compiles to a no-op stub with empty inline method bodies
|
||||
|
||||
**Key new file**:
|
||||
- No separate `TracingInstrumentation.h` file is needed. All instrumentation call sites use `#include <xrpl/telemetry/SpanGuard.h>` directly.
|
||||
|
||||
- `src/xrpld/telemetry/TracingInstrumentation.h`
|
||||
**Key modified file**:
|
||||
|
||||
- `include/xrpl/telemetry/SpanGuard.h`
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [04-code-samples.md §4.3](./04-code-samples.md) — Full macro definitions for `XRPL_TRACE_SPAN`, `XRPL_TRACE_RPC`, `XRPL_TRACE_CONSENSUS`, `XRPL_TRACE_SET_ATTR`, `XRPL_TRACE_EXCEPTION` with both enabled and disabled branches
|
||||
- [03-implementation-strategy.md §3.7.3](./03-implementation-strategy.md) — Conditional instrumentation pattern: compile-time `#ifndef` and runtime `shouldTrace*()` checks
|
||||
- [04-code-samples.md §4.3](./04-code-samples.md) — SpanGuard API reference: factory methods, usage patterns, compile-time disabled behavior, and discard support
|
||||
- [03-implementation-strategy.md §3.7.3](./03-implementation-strategy.md) — Conditional instrumentation pattern: factory methods handle compile-time and runtime checks internally
|
||||
- [03-implementation-strategy.md §3.9.7](./03-implementation-strategy.md) — Before/after code examples showing minimal intrusiveness (~1-3 lines per instrumentation point)
|
||||
|
||||
---
|
||||
@@ -341,17 +333,17 @@
|
||||
**What to do**:
|
||||
|
||||
- Edit `src/xrpld/rpc/detail/ServerHandler.cpp`:
|
||||
- `#include` the `TracingInstrumentation.h` header
|
||||
- `#include <xrpl/telemetry/SpanGuard.h>`
|
||||
- In `ServerHandler::onRequest(Session& session)`:
|
||||
- At the top of the method, add: `XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.request");`
|
||||
- After the RPC command name is extracted, set attribute: `XRPL_TRACE_SET_ATTR("xrpl.rpc.command", command);`
|
||||
- After the response status is known, set: `XRPL_TRACE_SET_ATTR("http.status_code", static_cast<int64_t>(statusCode));`
|
||||
- Wrap error paths with: `XRPL_TRACE_EXCEPTION(e);`
|
||||
- At the top of the method, add: `auto span = telemetry::SpanGuard::rpcSpan("rpc.request");`
|
||||
- After the RPC command name is extracted, set attribute: `span.setAttribute("xrpl.rpc.command", command);`
|
||||
- After the response status is known, set: `span.setAttribute("http.status_code", static_cast<int64_t>(statusCode));`
|
||||
- Wrap error paths with: `span.recordException(e);`
|
||||
- In `ServerHandler::processRequest(...)`:
|
||||
- Add a child span: `XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.process");`
|
||||
- Set method attribute: `XRPL_TRACE_SET_ATTR("xrpl.rpc.method", request_method);`
|
||||
- Add a child span: `auto span = telemetry::SpanGuard::rpcSpan("rpc.process");`
|
||||
- Set method attribute: `span.setAttribute("xrpl.rpc.method", request_method);`
|
||||
- In `ServerHandler::onWSMessage(...)` (WebSocket path):
|
||||
- Add: `XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.ws.message");`
|
||||
- Add: `auto span = telemetry::SpanGuard::rpcSpan("rpc.ws.message");`
|
||||
|
||||
- The goal is to see spans like:
|
||||
```
|
||||
@@ -366,7 +358,7 @@
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [04-code-samples.md §4.5.3](./04-code-samples.md) — Complete `ServerHandler::onRequest()` instrumented code sample with W3C header extraction, span creation, attribute setting, and error handling
|
||||
- [04-code-samples.md §4.5.3](./04-code-samples.md) — Complete `ServerHandler::onRequest()` instrumented code sample using SpanGuard factory methods
|
||||
- [01-architecture-analysis.md §1.5](./01-architecture-analysis.md) — RPC request flow diagram: HTTP request -> attributes -> jobqueue.enqueue -> rpc.command -> response
|
||||
- [01-architecture-analysis.md §1.6](./01-architecture-analysis.md) — Key trace points table: `rpc.request` in `ServerHandler.cpp::onRequest()` (Priority: High)
|
||||
- [02-design-decisions.md §2.3](./02-design-decisions.md) — Span naming convention: `rpc.request`, `rpc.command.*`
|
||||
@@ -382,15 +374,15 @@
|
||||
**What to do**:
|
||||
|
||||
- Edit `src/xrpld/rpc/detail/RPCHandler.cpp`:
|
||||
- `#include` the `TracingInstrumentation.h` header
|
||||
- `#include <xrpl/telemetry/SpanGuard.h>`
|
||||
- In `doCommand(RPC::JsonContext& context, Json::Value& result)`:
|
||||
- At the top: `XRPL_TRACE_RPC(context.app.getTelemetry(), "rpc.command." + context.method);`
|
||||
- At the top: `auto span = telemetry::SpanGuard::rpcSpan("rpc.command." + context.method);`
|
||||
- Set attributes:
|
||||
- `XRPL_TRACE_SET_ATTR("xrpl.rpc.command", context.method);`
|
||||
- `XRPL_TRACE_SET_ATTR("xrpl.rpc.version", static_cast<int64_t>(context.apiVersion));`
|
||||
- `XRPL_TRACE_SET_ATTR("xrpl.rpc.role", (context.role == Role::ADMIN) ? "admin" : "user");`
|
||||
- On success: `XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");`
|
||||
- On error: `XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "error");` and set the error message
|
||||
- `span.setAttribute("xrpl.rpc.command", context.method);`
|
||||
- `span.setAttribute("xrpl.rpc.version", static_cast<int64_t>(context.apiVersion));`
|
||||
- `span.setAttribute("xrpl.rpc.role", (context.role == Role::ADMIN) ? "admin" : "user");`
|
||||
- On success: `span.setAttribute("xrpl.rpc.status", "success");`
|
||||
- On error: `span.setAttribute("xrpl.rpc.status", "error");` and set the error message
|
||||
|
||||
- After this, traces in Tempo/Grafana should look like:
|
||||
```
|
||||
@@ -553,7 +545,7 @@
|
||||
| 2 | Core Telemetry interface + NullImpl | 3 | 0 | 1 |
|
||||
| 3 | OTel-backed Telemetry implementation | 2 | 1 | 1, 2 |
|
||||
| 4 | Application lifecycle integration | 0 | 3 | 2, 3 |
|
||||
| 5 | Instrumentation macros | 1 | 0 | 2 |
|
||||
| 5 | SpanGuard factory methods | 0 | 1 | 2 |
|
||||
| 6 | Instrument RPC ServerHandler | 0 | 1 | 4, 5 |
|
||||
| 7 | Instrument RPC command execution | 0 | 1 | 4, 5 |
|
||||
| 8 | End-to-end verification | 0 | 0 | 0-7 |
|
||||
@@ -631,6 +623,6 @@ Issues encountered during POC implementation that inform future work:
|
||||
| Conan package only builds OTLP HTTP exporter, not gRPC | Switched from gRPC to HTTP exporter (`localhost:4318/v1/traces`) | HTTP exporter is the default; gRPC requires custom Conan profile |
|
||||
| CMake target `opentelemetry-cpp::api` etc. don't exist in Conan package | Use umbrella target `opentelemetry-cpp::opentelemetry-cpp` | Conan targets differ from upstream CMake targets |
|
||||
| OTel Collector `logging` exporter deprecated | Renamed to `debug` exporter | Use `debug` in all collector configs going forward |
|
||||
| Macro parameter `telemetry` collided with `::xrpl::telemetry::` namespace | Renamed macro params to `_tel_obj_`, `_span_name_` | Avoid common words as macro parameter names |
|
||||
| Macro parameter `telemetry` collided with `::xrpl::telemetry::` namespace | Replaced macros with SpanGuard factory methods (no macros needed) | Factory methods avoid macro hygiene issues entirely |
|
||||
| `opentelemetry::trace::Scope` creates new context on move | Store scope as member, create once in constructor | SpanGuard move semantics need care with Scope lifecycle |
|
||||
| `TracerProviderFactory::Create` returns `unique_ptr<sdk::TracerProvider>`, not `nostd::shared_ptr` | Use `std::shared_ptr` member, wrap in `nostd::shared_ptr` for global provider | OTel SDK factory return types don't match API provider types |
|
||||
|
||||
@@ -1552,31 +1552,42 @@ validators.txt
|
||||
#
|
||||
# endpoint=http://localhost:4318/v1/traces
|
||||
#
|
||||
# The OpenTelemetry Collector endpoint (OTLP/HTTP). Default: http://localhost:4318/v1/traces.
|
||||
# The OTLP/HTTP exporter endpoint. The server sends trace data as
|
||||
# protobuf-encoded HTTP POST requests to this URL.
|
||||
# Default: http://localhost:4318/v1/traces.
|
||||
#
|
||||
# sampling_ratio=1.0
|
||||
#
|
||||
# Head-based sampling ratio: the fraction of traces to keep, decided at
|
||||
# span creation time (before the trace completes). Values in [0.0, 1.0].
|
||||
# Head-based sampling ratio using TraceIdRatioBasedSampler. The decision
|
||||
# to record or drop a trace is made at span creation time, before the
|
||||
# span starts, based on the trace ID. Values in [0.0, 1.0].
|
||||
# 1.0 = trace everything, 0.1 = sample ~10% of traces. Default: 1.0.
|
||||
# For tail-based (post-hoc) filtering — where you decide to drop a span
|
||||
# after inspecting its content — use SpanGuard::discard() in code.
|
||||
#
|
||||
# trace_rpc=1
|
||||
#
|
||||
# Enable RPC request tracing. Default: 1.
|
||||
# Enable tracing for JSON-RPC and WebSocket API request handling —
|
||||
# command parsing, execution, and response serialization. Default: 1.
|
||||
#
|
||||
# trace_transactions=1
|
||||
#
|
||||
# Enable transaction lifecycle tracing. Default: 1.
|
||||
# Enable tracing for the transaction lifecycle — submission, validation,
|
||||
# application to ledgers, and final disposition. Default: 1.
|
||||
#
|
||||
# trace_consensus=1
|
||||
#
|
||||
# Enable consensus round tracing. Default: 1.
|
||||
# Enable tracing for the consensus round lifecycle — proposals,
|
||||
# validations, mode changes, and ledger acceptance. Default: 1.
|
||||
#
|
||||
# trace_peer=0
|
||||
#
|
||||
# Enable peer message tracing (high volume). Default: 0.
|
||||
# Enable tracing for peer-to-peer protocol messages — overlay message
|
||||
# send/receive, peer handshakes, and routing. High volume; disabled
|
||||
# by default. Default: 0.
|
||||
#
|
||||
# trace_ledger=1
|
||||
#
|
||||
# Enable ledger close/accept tracing. Default: 1.
|
||||
# Enable tracing for ledger close and accept operations — ledger
|
||||
# building, state hashing, and write-back to the node store. Default: 1.
|
||||
#
|
||||
|
||||
@@ -203,6 +203,7 @@ words:
|
||||
- permdex
|
||||
- perminute
|
||||
- permissioned
|
||||
- pimpl
|
||||
- pointee
|
||||
- populator
|
||||
- preauth
|
||||
|
||||
@@ -67,9 +67,14 @@ services:
|
||||
networks:
|
||||
- xrpld-telemetry
|
||||
|
||||
# Named volume for Tempo trace storage (WAL and compacted blocks).
|
||||
# Data persists across container restarts. Remove with:
|
||||
# docker compose -f docker/telemetry/docker-compose.yml down -v
|
||||
volumes:
|
||||
tempo-data:
|
||||
|
||||
# Isolated bridge network so services communicate by container name
|
||||
# (e.g., the collector reaches Tempo at http://tempo:4317).
|
||||
networks:
|
||||
xrpld-telemetry:
|
||||
driver: bridge
|
||||
|
||||
@@ -2,19 +2,35 @@
|
||||
|
||||
/** RAII guard for OpenTelemetry trace spans.
|
||||
|
||||
Wraps an OTel Span and Scope together. On construction, the span is
|
||||
activated on the current thread's context (via Scope). On destruction,
|
||||
the span is ended and the previous context is restored.
|
||||
Wraps an OTel Span and Scope behind the pimpl idiom so that no
|
||||
opentelemetry headers are exposed in this public header. When
|
||||
XRPL_ENABLE_TELEMETRY is not defined, SpanGuard is an empty class
|
||||
with all-inline no-op methods — zero overhead, zero dependencies.
|
||||
|
||||
Dependency diagram:
|
||||
|
||||
+------------------------------------+
|
||||
| SpanGuard |
|
||||
+------------------------------------+
|
||||
| - span_ : shared_ptr<Span> |
|
||||
| - scope_ : Scope |
|
||||
+------------------------------------+
|
||||
| uses
|
||||
+-----------------------------------------+
|
||||
| SpanGuard |
|
||||
+-----------------------------------------+
|
||||
| - impl_ : unique_ptr<Impl> (pimpl) |
|
||||
+-----------------------------------------+
|
||||
| + rpcSpan(name) : SpanGuard [static] |
|
||||
| + txSpan(name) : SpanGuard [static] |
|
||||
| + consensusSpan(name) [static] |
|
||||
| + peerSpan(name) [static] |
|
||||
| + ledgerSpan(name) [static] |
|
||||
| + span(name) [static] |
|
||||
| + childSpan(name) : SpanGuard |
|
||||
| + linkedSpan(name) : SpanGuard |
|
||||
| + captureContext() : SpanContext |
|
||||
| + setAttribute(key, value) |
|
||||
| + setOk() / setError(desc) |
|
||||
| + addEvent(name) |
|
||||
| + recordException(e) |
|
||||
| + discard() |
|
||||
| + operator bool() |
|
||||
+-----------------------------------------+
|
||||
| hides (pimpl)
|
||||
+-------+-------+
|
||||
| |
|
||||
+--------+ +-------------+
|
||||
@@ -23,223 +39,393 @@
|
||||
| | | movable) |
|
||||
+--------+ +-------------+
|
||||
|
||||
Used by the XRPL_TRACE_* macros in TracingInstrumentation.h. Can also
|
||||
be stored in std::optional for conditional tracing (move-constructible).
|
||||
|
||||
Only compiled when XRPL_ENABLE_TELEMETRY is defined.
|
||||
Static factory methods access the global Telemetry instance
|
||||
internally (via Telemetry::getInstance()), check whether tracing
|
||||
is enabled for the requested subsystem, and return either an
|
||||
active guard or a null (no-op) guard. Callers never need a
|
||||
Telemetry reference.
|
||||
|
||||
Usage examples:
|
||||
|
||||
1. Basic RAII tracing:
|
||||
1. Basic RPC tracing (factory method):
|
||||
@code
|
||||
{
|
||||
SpanGuard guard(telemetry.startSpan("rpc.command.submit"));
|
||||
guard.setAttribute("xrpl.rpc.command", "submit");
|
||||
// ... span is active on this thread's context
|
||||
} // span ended, previous context restored
|
||||
auto span = SpanGuard::rpcSpan("rpc.command.submit");
|
||||
span.setAttribute("xrpl.rpc.command", "submit");
|
||||
span.setAttribute("xrpl.rpc.status", "success");
|
||||
// span ended automatically on scope exit
|
||||
@endcode
|
||||
|
||||
2. Conditional tracing with std::optional:
|
||||
2. Error recording:
|
||||
@code
|
||||
std::optional<SpanGuard> guard;
|
||||
if (telemetry.isEnabled() && telemetry.shouldTraceRpc())
|
||||
guard.emplace(telemetry.startSpan("rpc.request"));
|
||||
// ... guard may or may not hold a span
|
||||
@endcode
|
||||
|
||||
3. Error recording:
|
||||
@code
|
||||
SpanGuard guard(telemetry.startSpan("rpc.command.submit"));
|
||||
auto span = SpanGuard::rpcSpan("rpc.command.submit");
|
||||
try {
|
||||
// ... do work
|
||||
guard.setOk();
|
||||
doWork();
|
||||
span.setOk();
|
||||
} catch (std::exception const& e) {
|
||||
guard.recordException(e); // sets status to error
|
||||
span.recordException(e);
|
||||
}
|
||||
@endcode
|
||||
|
||||
@note Thread safety: A SpanGuard must only be used on the thread where
|
||||
it was constructed (the Scope binds to the thread-local context stack).
|
||||
Use context() to propagate the trace to other threads.
|
||||
3. Cross-thread context propagation:
|
||||
@code
|
||||
// Thread A: create span and capture context
|
||||
auto span = SpanGuard::consensusSpan("consensus.round");
|
||||
auto ctx = span.captureContext();
|
||||
|
||||
@note Limitation: Move assignment is deleted because re-scoping a span
|
||||
mid-flight would corrupt the context stack. Only move construction is
|
||||
supported (for std::optional emplacement).
|
||||
// Thread B: create child with captured context
|
||||
auto child = SpanGuard::childSpan("consensus.accept", ctx);
|
||||
@endcode
|
||||
|
||||
4. Conditional check (rarely needed — methods are no-ops on null):
|
||||
@code
|
||||
auto span = SpanGuard::rpcSpan("rpc.request");
|
||||
if (span) {
|
||||
// expensive attribute computation only when active
|
||||
span.setAttribute("xrpl.rpc.payload_size", computeSize());
|
||||
}
|
||||
@endcode
|
||||
|
||||
5. Tail-based filtering via discard():
|
||||
@code
|
||||
auto span = SpanGuard::txSpan("tx.process");
|
||||
auto result = preflight(tx);
|
||||
if (result != tesSUCCESS) {
|
||||
span.discard(); // drop span, never exported
|
||||
return result;
|
||||
}
|
||||
@endcode
|
||||
|
||||
@note Thread safety: A SpanGuard must only be used on the thread
|
||||
where it was constructed (the internal Scope binds to the
|
||||
thread-local context stack). Use captureContext() to propagate
|
||||
the trace to other threads.
|
||||
|
||||
@note Move semantics: Move construction transfers ownership of
|
||||
the pimpl pointer — no double-Scope issues. Move assignment is
|
||||
deleted to prevent re-scoping mid-flight.
|
||||
|
||||
@note Known limitations:
|
||||
- Attributes cannot be removed per the OTel spec; use
|
||||
setAttribute with an empty value as a convention.
|
||||
- SpanGuard::span() (raw Span access) is intentionally not
|
||||
exposed — all interaction goes through the public methods.
|
||||
*/
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
#include <xrpl/telemetry/DiscardFlag.h>
|
||||
|
||||
#include <opentelemetry/context/runtime_context.h>
|
||||
#include <opentelemetry/nostd/shared_ptr.h>
|
||||
#include <opentelemetry/trace/scope.h>
|
||||
#include <opentelemetry/trace/span.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <string_view>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
/** Opaque wrapper for an OTel context snapshot.
|
||||
|
||||
Used to propagate trace context across threads. Created by
|
||||
SpanGuard::captureContext(), consumed by SpanGuard::childSpan()
|
||||
or SpanGuard::linkedSpan() with an explicit parent/link context.
|
||||
*/
|
||||
class SpanContext
|
||||
{
|
||||
friend class SpanGuard;
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
struct Impl;
|
||||
std::shared_ptr<Impl> impl_;
|
||||
explicit SpanContext(std::shared_ptr<Impl> impl);
|
||||
#endif
|
||||
|
||||
public:
|
||||
SpanContext() = default;
|
||||
|
||||
/** @return true if this context holds a valid trace context. */
|
||||
bool
|
||||
isValid() const;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Real implementation (pimpl, compiled in SpanGuard.cpp)
|
||||
// ---------------------------------------------------------------------------
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
/** RAII wrapper that activates a span on construction and ends it on
|
||||
destruction. Non-copyable but move-constructible so it can be held
|
||||
in std::optional for conditional tracing.
|
||||
destruction. All OTel types are hidden behind the Impl pointer.
|
||||
Non-copyable, move-constructible.
|
||||
*/
|
||||
class SpanGuard
|
||||
{
|
||||
/** The OTel span being guarded. Set to nullptr after move. */
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span_;
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
|
||||
/** Scope that activates span_ on the current thread's context stack. */
|
||||
opentelemetry::trace::Scope scope_;
|
||||
explicit SpanGuard(std::unique_ptr<Impl> impl);
|
||||
|
||||
public:
|
||||
/** Construct a guard that activates @p span on the current context.
|
||||
/** Construct a null (no-op) guard. All methods are safe to call. */
|
||||
SpanGuard();
|
||||
~SpanGuard();
|
||||
|
||||
@param span The span to guard. Ended in the destructor.
|
||||
*/
|
||||
explicit SpanGuard(opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span)
|
||||
: span_(std::move(span)), scope_(span_)
|
||||
{
|
||||
}
|
||||
|
||||
/** Non-copyable. Move-constructible to support std::optional.
|
||||
|
||||
The move constructor creates a new Scope from the transferred span,
|
||||
because Scope is not movable.
|
||||
*/
|
||||
SpanGuard(SpanGuard&& other) noexcept;
|
||||
SpanGuard&
|
||||
operator=(SpanGuard&&) = delete;
|
||||
SpanGuard(SpanGuard const&) = delete;
|
||||
SpanGuard&
|
||||
operator=(SpanGuard const&) = delete;
|
||||
SpanGuard(SpanGuard&& other) noexcept : span_(std::move(other.span_)), scope_(span_)
|
||||
{
|
||||
other.span_ = nullptr;
|
||||
}
|
||||
SpanGuard&
|
||||
operator=(SpanGuard&&) = delete;
|
||||
|
||||
~SpanGuard()
|
||||
{
|
||||
if (span_)
|
||||
span_->End();
|
||||
}
|
||||
// --- Static factory methods ----------------------------------------
|
||||
// Each checks the global Telemetry instance and the corresponding
|
||||
// shouldTrace*() flag. Returns a null guard if tracing is off.
|
||||
|
||||
/** @return A mutable reference to the underlying span. */
|
||||
opentelemetry::trace::Span&
|
||||
span()
|
||||
{
|
||||
return *span_;
|
||||
}
|
||||
/** Create an unconditional span (always created if telemetry is on). */
|
||||
static SpanGuard
|
||||
span(std::string_view name);
|
||||
|
||||
/** @return A const reference to the underlying span. */
|
||||
opentelemetry::trace::Span const&
|
||||
span() const
|
||||
{
|
||||
return *span_;
|
||||
}
|
||||
/** Create a span guarded by shouldTraceRpc(). */
|
||||
static SpanGuard
|
||||
rpcSpan(std::string_view name);
|
||||
|
||||
/** Mark the span status as OK. */
|
||||
/** Create a span guarded by shouldTraceTransactions(). */
|
||||
static SpanGuard
|
||||
txSpan(std::string_view name);
|
||||
|
||||
/** Create a span guarded by shouldTraceConsensus(). */
|
||||
static SpanGuard
|
||||
consensusSpan(std::string_view name);
|
||||
|
||||
/** Create a span guarded by shouldTracePeer(). */
|
||||
static SpanGuard
|
||||
peerSpan(std::string_view name);
|
||||
|
||||
/** Create a span guarded by shouldTraceLedger(). */
|
||||
static SpanGuard
|
||||
ledgerSpan(std::string_view name);
|
||||
|
||||
// --- Child / linked span creation ----------------------------------
|
||||
|
||||
/** Create a child span parented to this guard's active context.
|
||||
@param name Span name for the child.
|
||||
@return A new guard, or null if this guard is inactive.
|
||||
*/
|
||||
SpanGuard
|
||||
childSpan(std::string_view name) const;
|
||||
|
||||
/** Create a child span parented to an explicit captured context.
|
||||
@param name Span name for the child.
|
||||
@param parentCtx Context captured via captureContext().
|
||||
@return A new guard, or null if parentCtx is invalid.
|
||||
*/
|
||||
static SpanGuard
|
||||
childSpan(std::string_view name, SpanContext const& parentCtx);
|
||||
|
||||
/** Create a span linked (follows-from) to this guard's span.
|
||||
The new span is NOT a child — it starts a new sub-tree but
|
||||
carries a causal link to this span.
|
||||
@param name Span name for the linked span.
|
||||
@return A new guard, or null if this guard is inactive.
|
||||
*/
|
||||
SpanGuard
|
||||
linkedSpan(std::string_view name) const;
|
||||
|
||||
/** Create a span linked to an explicit captured context.
|
||||
@param name Span name for the linked span.
|
||||
@param linkCtx Context to link from.
|
||||
@return A new guard, or null if linkCtx is invalid.
|
||||
*/
|
||||
static SpanGuard
|
||||
linkedSpan(std::string_view name, SpanContext const& linkCtx);
|
||||
|
||||
// --- Context capture -----------------------------------------------
|
||||
|
||||
/** Snapshot the current thread's OTel context for cross-thread use.
|
||||
@return An opaque SpanContext, or an invalid one if null guard.
|
||||
*/
|
||||
SpanContext
|
||||
captureContext() const;
|
||||
|
||||
// --- Attribute setters (explicit overloads, no OTel types) ---------
|
||||
|
||||
/** Set a string attribute. No-op on a null guard. */
|
||||
void
|
||||
setOk()
|
||||
{
|
||||
span_->SetStatus(opentelemetry::trace::StatusCode::kOk);
|
||||
}
|
||||
setAttribute(std::string_view key, std::string_view value);
|
||||
|
||||
/** Set an explicit status code on the span.
|
||||
/** Set a string attribute (C-string overload). No-op on a null guard. */
|
||||
void
|
||||
setAttribute(std::string_view key, char const* value);
|
||||
|
||||
@param code The OTel status code.
|
||||
@param description Optional human-readable status description.
|
||||
/** Set an integer attribute. No-op on a null guard. */
|
||||
void
|
||||
setAttribute(std::string_view key, std::int64_t value);
|
||||
|
||||
/** Set a floating-point attribute. No-op on a null guard. */
|
||||
void
|
||||
setAttribute(std::string_view key, double value);
|
||||
|
||||
/** Set a boolean attribute. No-op on a null guard. */
|
||||
void
|
||||
setAttribute(std::string_view key, bool value);
|
||||
|
||||
// --- Status / events -----------------------------------------------
|
||||
|
||||
/** Mark the span status as OK. No-op on a null guard. */
|
||||
void
|
||||
setOk();
|
||||
|
||||
/** Mark the span status as error. No-op on a null guard.
|
||||
@param description Optional human-readable error description.
|
||||
*/
|
||||
void
|
||||
setStatus(opentelemetry::trace::StatusCode code, std::string_view description = "")
|
||||
{
|
||||
span_->SetStatus(code, std::string(description));
|
||||
}
|
||||
|
||||
/** Set a key-value attribute on the span.
|
||||
|
||||
@param key Attribute name (e.g. "xrpl.rpc.command").
|
||||
@param value Attribute value (string, int, bool, etc.).
|
||||
*/
|
||||
template <typename T>
|
||||
void
|
||||
setAttribute(std::string_view key, T&& value)
|
||||
{
|
||||
span_->SetAttribute(
|
||||
opentelemetry::nostd::string_view(key.data(), key.size()), std::forward<T>(value));
|
||||
}
|
||||
|
||||
/** Add a named event to the span's timeline.
|
||||
setError(std::string_view description = "");
|
||||
|
||||
/** Add a named event to the span's timeline. No-op on a null guard.
|
||||
@param name Event name.
|
||||
*/
|
||||
void
|
||||
addEvent(std::string_view name)
|
||||
{
|
||||
span_->AddEvent(std::string(name));
|
||||
}
|
||||
addEvent(std::string_view name);
|
||||
|
||||
/** Record an exception as a span event following OTel semantic
|
||||
conventions, and mark the span status as error.
|
||||
|
||||
No-op on a null guard.
|
||||
@param e The exception to record.
|
||||
*/
|
||||
void
|
||||
recordException(std::exception const& e)
|
||||
{
|
||||
span_->AddEvent(
|
||||
"exception",
|
||||
{{"exception.type", "std::exception"}, {"exception.message", std::string(e.what())}});
|
||||
span_->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
|
||||
}
|
||||
|
||||
/** Return the current OTel context.
|
||||
|
||||
Useful for creating child spans on a different thread by passing
|
||||
this context to Telemetry::startSpan(name, parentContext).
|
||||
*/
|
||||
opentelemetry::context::Context
|
||||
context() const
|
||||
{
|
||||
return opentelemetry::context::RuntimeContext::GetCurrent();
|
||||
}
|
||||
recordException(std::exception const& e);
|
||||
|
||||
/** Mark this span for discard and end it immediately.
|
||||
|
||||
Sets the tl_discardCurrentSpan thread-local flag before calling
|
||||
End(). The OTel SDK calls FilteringSpanProcessor::OnEnd()
|
||||
synchronously on the same thread, where the flag is checked and
|
||||
cleared. The span is dropped before entering the batch export
|
||||
queue — never sent over the network or stored.
|
||||
|
||||
After calling discard(), the guard is inert — the destructor will
|
||||
not call End() again.
|
||||
|
||||
Typical usage:
|
||||
@code
|
||||
SpanGuard guard(telemetry.startSpan("tx.process"));
|
||||
auto result = preflight(tx);
|
||||
if (result != tesSUCCESS)
|
||||
{
|
||||
guard.discard();
|
||||
return result;
|
||||
}
|
||||
@endcode
|
||||
The FilteringSpanProcessor drops the span before it enters the
|
||||
batch export queue. After discard(), the guard is inert.
|
||||
*/
|
||||
void
|
||||
discard();
|
||||
|
||||
/** @return true if this guard holds an active span. */
|
||||
explicit
|
||||
operator bool() const;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// No-op stub (all inline, zero overhead, no OTel dependency)
|
||||
// ---------------------------------------------------------------------------
|
||||
#else // XRPL_ENABLE_TELEMETRY not defined
|
||||
|
||||
class SpanGuard
|
||||
{
|
||||
public:
|
||||
SpanGuard() = default;
|
||||
~SpanGuard() = default;
|
||||
SpanGuard(SpanGuard&&) noexcept = default;
|
||||
SpanGuard&
|
||||
operator=(SpanGuard&&) = delete;
|
||||
SpanGuard(SpanGuard const&) = delete;
|
||||
SpanGuard&
|
||||
operator=(SpanGuard const&) = delete;
|
||||
|
||||
static SpanGuard
|
||||
span(std::string_view)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
static SpanGuard
|
||||
rpcSpan(std::string_view)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
static SpanGuard
|
||||
txSpan(std::string_view)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
static SpanGuard
|
||||
consensusSpan(std::string_view)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
static SpanGuard
|
||||
peerSpan(std::string_view)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
static SpanGuard
|
||||
ledgerSpan(std::string_view)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
childSpan(std::string_view) const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
static SpanGuard
|
||||
childSpan(std::string_view, SpanContext const&)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
SpanGuard
|
||||
linkedSpan(std::string_view) const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
static SpanGuard
|
||||
linkedSpan(std::string_view, SpanContext const&)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
SpanContext
|
||||
captureContext() const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
void
|
||||
setAttribute(std::string_view, std::string_view)
|
||||
{
|
||||
}
|
||||
void
|
||||
setAttribute(std::string_view, char const*)
|
||||
{
|
||||
}
|
||||
void
|
||||
setAttribute(std::string_view, std::int64_t)
|
||||
{
|
||||
}
|
||||
void
|
||||
setAttribute(std::string_view, double)
|
||||
{
|
||||
}
|
||||
void
|
||||
setAttribute(std::string_view, bool)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
setOk()
|
||||
{
|
||||
}
|
||||
void
|
||||
setError(std::string_view = "")
|
||||
{
|
||||
}
|
||||
void
|
||||
addEvent(std::string_view)
|
||||
{
|
||||
}
|
||||
void
|
||||
recordException(std::exception const&)
|
||||
{
|
||||
}
|
||||
void
|
||||
discard()
|
||||
{
|
||||
if (span_)
|
||||
{
|
||||
tl_discardCurrentSpan = true;
|
||||
span_->End();
|
||||
span_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
explicit
|
||||
operator bool() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
|
||||
@@ -89,7 +89,34 @@ namespace telemetry {
|
||||
|
||||
class Telemetry
|
||||
{
|
||||
/** Global singleton pointer, set by start()/stop() in the active
|
||||
implementation. Allows SpanGuard factory methods to access the
|
||||
Telemetry instance without callers passing it explicitly.
|
||||
@see setInstance(), getInstance()
|
||||
*/
|
||||
inline static Telemetry* instance_ = nullptr;
|
||||
|
||||
public:
|
||||
/** Get the global Telemetry instance.
|
||||
@return Pointer to the active instance, or nullptr if not started.
|
||||
*/
|
||||
static Telemetry*
|
||||
getInstance()
|
||||
{
|
||||
return instance_;
|
||||
}
|
||||
|
||||
/** Set the global Telemetry instance.
|
||||
Called by start()/stop() in concrete implementations.
|
||||
Tests can call this with a mock to override the global instance.
|
||||
@param t Pointer to the Telemetry instance, or nullptr to clear.
|
||||
*/
|
||||
static void
|
||||
setInstance(Telemetry* t)
|
||||
{
|
||||
instance_ = t;
|
||||
}
|
||||
|
||||
/** Configuration parsed from the [telemetry] section of xrpld.cfg.
|
||||
|
||||
All fields have sensible defaults so the section can be minimal
|
||||
@@ -119,7 +146,12 @@ public:
|
||||
/** Path to a CA certificate bundle for TLS verification. */
|
||||
std::string tlsCertPath;
|
||||
|
||||
/** Head-based sampling ratio in [0.0, 1.0]. 1.0 = trace everything. */
|
||||
/** Head-based sampling ratio in [0.0, 1.0]. 1.0 = trace everything.
|
||||
This is a head-based (pre-decision) sampler using
|
||||
TraceIdRatioBasedSampler — the decision to record or drop a
|
||||
trace is made before the root span starts. For post-hoc
|
||||
(tail-based) filtering, see SpanGuard::discard().
|
||||
*/
|
||||
double samplingRatio = 1.0;
|
||||
|
||||
/** Maximum number of spans per batch export. */
|
||||
@@ -224,7 +256,12 @@ public:
|
||||
OpenTelemetry's context propagation.
|
||||
|
||||
@param name Span name (typically "rpc.command.<cmd>").
|
||||
@param kind The span kind (defaults to kInternal).
|
||||
@param kind The span kind (defaults to kInternal). Possible values:
|
||||
- kInternal: default, in-process operation
|
||||
- kServer: incoming synchronous request (e.g. RPC)
|
||||
- kClient: outgoing synchronous request
|
||||
- kProducer: async message send (e.g. peer broadcast)
|
||||
- kConsumer: async message receive
|
||||
@return A shared pointer to the new Span.
|
||||
*/
|
||||
virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
|
||||
|
||||
@@ -39,11 +39,13 @@ public:
|
||||
void
|
||||
start() override
|
||||
{
|
||||
Telemetry::setInstance(this);
|
||||
}
|
||||
|
||||
void
|
||||
stop() override
|
||||
{
|
||||
Telemetry::setInstance(nullptr);
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
332
src/libxrpl/telemetry/SpanGuard.cpp
Normal file
332
src/libxrpl/telemetry/SpanGuard.cpp
Normal file
@@ -0,0 +1,332 @@
|
||||
/** Pimpl implementation for SpanGuard and SpanContext.
|
||||
|
||||
All OpenTelemetry SDK types are confined to this translation unit.
|
||||
The public SpanGuard.h header contains only standard-library types
|
||||
and forward-declares the Impl struct.
|
||||
|
||||
Static factory methods (rpcSpan, txSpan, etc.) access the global
|
||||
Telemetry instance via Telemetry::getInstance(), check the relevant
|
||||
shouldTrace*() flag, and return either an active guard with a real
|
||||
Span+Scope or a null guard whose methods are all no-ops.
|
||||
|
||||
The Impl struct holds the OTel Span (shared_ptr) and Scope.
|
||||
Scope is non-movable, but since Impl lives behind a unique_ptr,
|
||||
SpanGuard's move constructor simply transfers the pointer — no
|
||||
double-Scope issues.
|
||||
|
||||
@see SpanGuard (SpanGuard.h), Telemetry (Telemetry.h),
|
||||
FilteringSpanProcessor (Telemetry.cpp)
|
||||
*/
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
#include <xrpl/telemetry/DiscardFlag.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
#include <opentelemetry/context/runtime_context.h>
|
||||
#include <opentelemetry/nostd/shared_ptr.h>
|
||||
#include <opentelemetry/trace/context.h>
|
||||
#include <opentelemetry/trace/provider.h>
|
||||
#include <opentelemetry/trace/scope.h>
|
||||
#include <opentelemetry/trace/span.h>
|
||||
#include <opentelemetry/trace/span_startoptions.h>
|
||||
#include <opentelemetry/trace/tracer.h>
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
namespace otel_trace = opentelemetry::trace;
|
||||
|
||||
// ===== SpanContext::Impl ===================================================
|
||||
|
||||
struct SpanContext::Impl
|
||||
{
|
||||
opentelemetry::context::Context ctx;
|
||||
|
||||
explicit Impl(opentelemetry::context::Context c) : ctx(std::move(c))
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
SpanContext::SpanContext(std::shared_ptr<Impl> impl) : impl_(std::move(impl))
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
SpanContext::isValid() const
|
||||
{
|
||||
return impl_ != nullptr;
|
||||
}
|
||||
|
||||
// ===== SpanGuard::Impl ====================================================
|
||||
|
||||
struct SpanGuard::Impl
|
||||
{
|
||||
/** The OTel span being guarded. Set to nullptr after discard(). */
|
||||
opentelemetry::nostd::shared_ptr<otel_trace::Span> span;
|
||||
|
||||
/** Scope that activates span on the current thread's context stack. */
|
||||
otel_trace::Scope scope;
|
||||
|
||||
explicit Impl(opentelemetry::nostd::shared_ptr<otel_trace::Span> s)
|
||||
: span(std::move(s)), scope(span)
|
||||
{
|
||||
}
|
||||
|
||||
~Impl()
|
||||
{
|
||||
if (span)
|
||||
span->End();
|
||||
}
|
||||
|
||||
Impl(Impl const&) = delete;
|
||||
Impl&
|
||||
operator=(Impl const&) = delete;
|
||||
Impl(Impl&&) = delete;
|
||||
Impl&
|
||||
operator=(Impl&&) = delete;
|
||||
};
|
||||
|
||||
// ===== SpanGuard core lifecycle ============================================
|
||||
|
||||
SpanGuard::SpanGuard() = default;
|
||||
SpanGuard::~SpanGuard() = default;
|
||||
SpanGuard::SpanGuard(SpanGuard&&) noexcept = default;
|
||||
|
||||
SpanGuard::SpanGuard(std::unique_ptr<Impl> impl) : impl_(std::move(impl))
|
||||
{
|
||||
}
|
||||
|
||||
SpanGuard::
|
||||
operator bool() const
|
||||
{
|
||||
return impl_ != nullptr;
|
||||
}
|
||||
|
||||
// ===== Static factory methods ==============================================
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::span(std::string_view name)
|
||||
{
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled())
|
||||
return {};
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::rpcSpan(std::string_view name)
|
||||
{
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled() || !tel->shouldTraceRpc())
|
||||
return {};
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::txSpan(std::string_view name)
|
||||
{
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions())
|
||||
return {};
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::consensusSpan(std::string_view name)
|
||||
{
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled() || !tel->shouldTraceConsensus())
|
||||
return {};
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::peerSpan(std::string_view name)
|
||||
{
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled() || !tel->shouldTracePeer())
|
||||
return {};
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::ledgerSpan(std::string_view name)
|
||||
{
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled() || !tel->shouldTraceLedger())
|
||||
return {};
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name)));
|
||||
}
|
||||
|
||||
// ===== Child / linked span creation ========================================
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::childSpan(std::string_view name) const
|
||||
{
|
||||
if (!impl_)
|
||||
return {};
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled())
|
||||
return {};
|
||||
auto ctx = opentelemetry::context::RuntimeContext::GetCurrent();
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name, ctx)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::childSpan(std::string_view name, SpanContext const& parentCtx)
|
||||
{
|
||||
if (!parentCtx.isValid())
|
||||
return {};
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled())
|
||||
return {};
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name, parentCtx.impl_->ctx)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::linkedSpan(std::string_view name) const
|
||||
{
|
||||
if (!impl_)
|
||||
return {};
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled())
|
||||
return {};
|
||||
|
||||
auto tracer = tel->getTracer("xrpld");
|
||||
auto spanCtx = impl_->span->GetContext();
|
||||
|
||||
otel_trace::StartSpanOptions opts;
|
||||
return SpanGuard(
|
||||
std::make_unique<Impl>(tracer->StartSpan(
|
||||
std::string(name), {}, {{spanCtx, {{"xrpl.link.type", "follows_from"}}}}, opts)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::linkedSpan(std::string_view name, SpanContext const& linkCtx)
|
||||
{
|
||||
if (!linkCtx.isValid())
|
||||
return {};
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled())
|
||||
return {};
|
||||
|
||||
auto tracer = tel->getTracer("xrpld");
|
||||
|
||||
// Extract the span from the captured context to get its SpanContext.
|
||||
auto parentSpan = otel_trace::GetSpan(linkCtx.impl_->ctx);
|
||||
if (!parentSpan || !parentSpan->GetContext().IsValid())
|
||||
return {};
|
||||
|
||||
otel_trace::StartSpanOptions opts;
|
||||
return SpanGuard(
|
||||
std::make_unique<Impl>(tracer->StartSpan(
|
||||
std::string(name),
|
||||
{},
|
||||
{{parentSpan->GetContext(), {{"xrpl.link.type", "follows_from"}}}},
|
||||
opts)));
|
||||
}
|
||||
|
||||
// ===== Context capture =====================================================
|
||||
|
||||
SpanContext
|
||||
SpanGuard::captureContext() const
|
||||
{
|
||||
if (!impl_)
|
||||
return {};
|
||||
auto ctx = opentelemetry::context::RuntimeContext::GetCurrent();
|
||||
return SpanContext(std::make_shared<SpanContext::Impl>(ctx));
|
||||
}
|
||||
|
||||
// ===== Attribute setters ===================================================
|
||||
|
||||
void
|
||||
SpanGuard::setAttribute(std::string_view key, std::string_view value)
|
||||
{
|
||||
if (impl_)
|
||||
impl_->span->SetAttribute(
|
||||
opentelemetry::nostd::string_view(key.data(), key.size()),
|
||||
opentelemetry::nostd::string_view(value.data(), value.size()));
|
||||
}
|
||||
|
||||
void
|
||||
SpanGuard::setAttribute(std::string_view key, char const* value)
|
||||
{
|
||||
setAttribute(key, std::string_view(value));
|
||||
}
|
||||
|
||||
void
|
||||
SpanGuard::setAttribute(std::string_view key, std::int64_t value)
|
||||
{
|
||||
if (impl_)
|
||||
impl_->span->SetAttribute(opentelemetry::nostd::string_view(key.data(), key.size()), value);
|
||||
}
|
||||
|
||||
void
|
||||
SpanGuard::setAttribute(std::string_view key, double value)
|
||||
{
|
||||
if (impl_)
|
||||
impl_->span->SetAttribute(opentelemetry::nostd::string_view(key.data(), key.size()), value);
|
||||
}
|
||||
|
||||
void
|
||||
SpanGuard::setAttribute(std::string_view key, bool value)
|
||||
{
|
||||
if (impl_)
|
||||
impl_->span->SetAttribute(opentelemetry::nostd::string_view(key.data(), key.size()), value);
|
||||
}
|
||||
|
||||
// ===== Status / events =====================================================
|
||||
|
||||
void
|
||||
SpanGuard::setOk()
|
||||
{
|
||||
if (impl_)
|
||||
impl_->span->SetStatus(otel_trace::StatusCode::kOk);
|
||||
}
|
||||
|
||||
void
|
||||
SpanGuard::setError(std::string_view description)
|
||||
{
|
||||
if (impl_)
|
||||
impl_->span->SetStatus(otel_trace::StatusCode::kError, std::string(description));
|
||||
}
|
||||
|
||||
void
|
||||
SpanGuard::addEvent(std::string_view name)
|
||||
{
|
||||
if (impl_)
|
||||
impl_->span->AddEvent(std::string(name));
|
||||
}
|
||||
|
||||
void
|
||||
SpanGuard::recordException(std::exception const& e)
|
||||
{
|
||||
if (!impl_)
|
||||
return;
|
||||
impl_->span->AddEvent(
|
||||
"exception",
|
||||
{{"exception.type", "std::exception"}, {"exception.message", std::string(e.what())}});
|
||||
impl_->span->SetStatus(otel_trace::StatusCode::kError, e.what());
|
||||
}
|
||||
|
||||
void
|
||||
SpanGuard::discard()
|
||||
{
|
||||
if (impl_)
|
||||
{
|
||||
tl_discardCurrentSpan = true;
|
||||
impl_->span->End();
|
||||
impl_->span = nullptr; // prevent ~Impl from calling End() again
|
||||
impl_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
@@ -146,11 +146,13 @@ public:
|
||||
void
|
||||
start() override
|
||||
{
|
||||
Telemetry::setInstance(this);
|
||||
}
|
||||
|
||||
void
|
||||
stop() override
|
||||
{
|
||||
Telemetry::setInstance(nullptr);
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -292,6 +294,10 @@ public:
|
||||
trace_api::Provider::SetTracerProvider(
|
||||
opentelemetry::nostd::shared_ptr<trace_api::TracerProvider>(sdkProvider_));
|
||||
|
||||
// Register as the global Telemetry instance so SpanGuard factory
|
||||
// methods can access it without callers passing a reference.
|
||||
Telemetry::setInstance(this);
|
||||
|
||||
JLOG(journal_.info()) << "Telemetry started successfully";
|
||||
}
|
||||
|
||||
@@ -299,10 +305,15 @@ public:
|
||||
stop() override
|
||||
{
|
||||
JLOG(journal_.info()) << "Telemetry stopping";
|
||||
|
||||
// Unregister global instance before tearing down the pipeline.
|
||||
Telemetry::setInstance(nullptr);
|
||||
|
||||
if (sdkProvider_)
|
||||
{
|
||||
// Force flush before shutdown
|
||||
sdkProvider_->ForceFlush();
|
||||
// Force flush with timeout to avoid blocking indefinitely
|
||||
// when the OTLP endpoint is unreachable.
|
||||
sdkProvider_->ForceFlush(std::chrono::milliseconds(5000));
|
||||
sdkProvider_.reset();
|
||||
trace_api::Provider::SetTracerProvider(
|
||||
opentelemetry::nostd::shared_ptr<trace_api::TracerProvider>(
|
||||
|
||||
Reference in New Issue
Block a user