From 0de5a776054a99b0fb87b115d44755337be2b217 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Thu, 16 Apr 2026 17:43:58 +0100 Subject: [PATCH] feat(telemetry): add FilteringSpanProcessor and SpanGuard::discard() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add span discard mechanism that drops unwanted spans before they enter the batch export queue, saving both network bandwidth and storage. FilteringSpanProcessor is a custom SpanProcessor decorator that wraps BatchSpanProcessor. SpanGuard::discard() sets a thread-local flag (tl_discardCurrentSpan) before calling Span::End(). The OTel SDK calls OnEnd() synchronously on the same thread, where the flag is checked and cleared to drop the span. New file: DiscardFlag.h — zero-dependency header for the thread-local flag, avoiding transitive include bloat from Telemetry.h. Co-Authored-By: Claude Opus 4.6 --- docs/build/telemetry.md | 48 ++++++++++---- include/xrpl/telemetry/DiscardFlag.h | 27 ++++++++ include/xrpl/telemetry/SpanGuard.h | 35 ++++++++++ src/libxrpl/telemetry/Telemetry.cpp | 98 +++++++++++++++++++++++++++- 4 files changed, 194 insertions(+), 14 deletions(-) create mode 100644 include/xrpl/telemetry/DiscardFlag.h diff --git a/docs/build/telemetry.md b/docs/build/telemetry.md index f3e571fa16..5d4ebbf0e3 100644 --- a/docs/build/telemetry.md +++ b/docs/build/telemetry.md @@ -251,18 +251,42 @@ The Conan package provides a single umbrella target ### Key files -| File | Purpose | -| ---------------------------------------------- | ----------------------------------------------------------- | -| `include/xrpl/telemetry/Telemetry.h` | Abstract telemetry interface and `Setup` struct | -| `include/xrpl/telemetry/SpanGuard.h` | RAII span guard (activates scope, ends span on destruction) | -| `src/libxrpl/telemetry/Telemetry.cpp` | OTel-backed implementation (`TelemetryImpl`) | -| `src/libxrpl/telemetry/TelemetryConfig.cpp` | Config parser (`setup_Telemetry()`) | -| `src/libxrpl/telemetry/NullTelemetry.cpp` | No-op implementation (used when disabled) | -| `src/xrpld/telemetry/TracingInstrumentation.h` | Convenience macros (`XRPL_TRACE_RPC`, etc.) | -| `src/xrpld/rpc/detail/ServerHandler.cpp` | RPC entry point instrumentation | -| `src/xrpld/rpc/detail/RPCHandler.cpp` | Per-command instrumentation | -| `docker/telemetry/docker-compose.yml` | Observability stack (Collector + Tempo + Grafana) | -| `docker/telemetry/otel-collector-config.yaml` | OTel Collector pipeline configuration | +| File | Purpose | +| ---------------------------------------------- | ------------------------------------------------------------ | +| `include/xrpl/telemetry/Telemetry.h` | Abstract telemetry interface and `Setup` struct | +| `include/xrpl/telemetry/SpanGuard.h` | RAII span guard with `discard()` for dropping unwanted spans | +| `include/xrpl/telemetry/DiscardFlag.h` | Thread-local discard flag (zero-dependency header) | +| `src/libxrpl/telemetry/Telemetry.cpp` | OTel SDK setup, `FilteringSpanProcessor`, provider lifecycle | +| `src/libxrpl/telemetry/TelemetryConfig.cpp` | Config parser (`setup_Telemetry()`) | +| `src/libxrpl/telemetry/NullTelemetry.cpp` | No-op implementation (used when disabled) | +| `src/xrpld/telemetry/TracingInstrumentation.h` | Convenience macros (`XRPL_TRACE_RPC`, etc.) | +| `src/xrpld/rpc/detail/ServerHandler.cpp` | RPC entry point instrumentation | +| `src/xrpld/rpc/detail/RPCHandler.cpp` | Per-command instrumentation | +| `docker/telemetry/docker-compose.yml` | Observability stack (Collector + Tempo + Grafana) | +| `docker/telemetry/otel-collector-config.yaml` | OTel Collector pipeline configuration | + +### Span discard mechanism + +`SpanGuard::discard()` allows callers to silently drop spans that turn out to be +uninteresting (e.g., failed preflight transactions). This saves both network bandwidth +and storage by preventing the span from being exported. + +The mechanism uses a thread-local flag (`tl_discardCurrentSpan` in `DiscardFlag.h`) as a +side-channel to the `FilteringSpanProcessor` (in `Telemetry.cpp`): + +1. `SpanGuard::discard()` sets the thread-local flag and calls `Span::End()` +2. The OTel SDK calls `FilteringSpanProcessor::OnEnd()` synchronously on the same thread +3. The processor checks the flag, clears it, and drops the span before it enters the batch queue + +```cpp +SpanGuard guard(telemetry.startSpan("tx.process")); +auto result = preflight(tx); +if (result != tesSUCCESS) +{ + guard.discard(); // span is dropped, never exported + return result; +} +``` ### Conditional compilation diff --git a/include/xrpl/telemetry/DiscardFlag.h b/include/xrpl/telemetry/DiscardFlag.h new file mode 100644 index 0000000000..991b9b8f6a --- /dev/null +++ b/include/xrpl/telemetry/DiscardFlag.h @@ -0,0 +1,27 @@ +#pragma once + +/** Thread-local flag for span discard signaling. + + SpanGuard::discard() sets tl_discardCurrentSpan to true before calling + Span::End(). The OTel SDK calls SpanProcessor::OnEnd() synchronously on + the same thread, so FilteringSpanProcessor checks and clears this flag + in OnEnd() to drop the span before it enters the batch export queue. + + This side-channel avoids inspecting the Recordable's internals (which + vary by exporter type — SpanData vs OtlpRecordable). + + Kept in a separate header to avoid transitive include bloat: SpanGuard.h + only needs this flag, not the full Telemetry.h with BasicConfig/Journal. + + @see SpanGuard::discard(), FilteringSpanProcessor (Telemetry.cpp) +*/ + +namespace xrpl { +namespace telemetry { + +/** When true, the FilteringSpanProcessor drops the current span in + OnEnd(). Set by SpanGuard::discard(), cleared by OnEnd(). */ +inline thread_local bool tl_discardCurrentSpan = false; + +} // namespace telemetry +} // namespace xrpl diff --git a/include/xrpl/telemetry/SpanGuard.h b/include/xrpl/telemetry/SpanGuard.h index 39ea99ff7a..acc422f862 100644 --- a/include/xrpl/telemetry/SpanGuard.h +++ b/include/xrpl/telemetry/SpanGuard.h @@ -69,6 +69,8 @@ #ifdef XRPL_ENABLE_TELEMETRY +#include + #include #include #include @@ -202,6 +204,39 @@ public: { return opentelemetry::context::RuntimeContext::GetCurrent(); } + + /** Mark this span for discard and end it immediately. + + Sets the tl_discardCurrentSpan thread-local flag before calling + End(). The OTel SDK calls FilteringSpanProcessor::OnEnd() + synchronously on the same thread, where the flag is checked and + cleared. The span is dropped before entering the batch export + queue — never sent over the network or stored. + + After calling discard(), the guard is inert — the destructor will + not call End() again. + + Typical usage: + @code + SpanGuard guard(telemetry.startSpan("tx.process")); + auto result = preflight(tx); + if (result != tesSUCCESS) + { + guard.discard(); + return result; + } + @endcode + */ + void + discard() + { + if (span_) + { + tl_discardCurrentSpan = true; + span_->End(); + span_ = nullptr; + } + } }; } // namespace telemetry diff --git a/src/libxrpl/telemetry/Telemetry.cpp b/src/libxrpl/telemetry/Telemetry.cpp index 1b9f2159b4..071d4d2c01 100644 --- a/src/libxrpl/telemetry/Telemetry.cpp +++ b/src/libxrpl/telemetry/Telemetry.cpp @@ -3,8 +3,11 @@ Compiled only when XRPL_ENABLE_TELEMETRY is defined (via CMake telemetry=ON). Contains: + - FilteringSpanProcessor: decorator that drops spans marked with + kDiscardedAttr before they enter the batch export queue. - TelemetryImpl: configures the OTel SDK with an OTLP/HTTP exporter, - batch span processor, trace-ID-ratio sampler, and resource attributes. + FilteringSpanProcessor wrapping a batch span processor, + trace-ID-ratio sampler, and resource attributes. - NullTelemetryOtel: no-op fallback used when telemetry is compiled in but disabled at runtime (enabled=0 in config). - make_Telemetry(): factory that selects the appropriate implementation. @@ -13,6 +16,7 @@ #ifdef XRPL_ENABLE_TELEMETRY #include +#include #include #include @@ -20,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +42,91 @@ namespace trace_sdk = opentelemetry::sdk::trace; namespace otlp_http = opentelemetry::exporter::otlp; namespace resource = opentelemetry::sdk::resource; +/** SpanProcessor decorator that drops discarded spans. + + Wraps a delegate processor (typically BatchSpanProcessor). In OnEnd(), + checks the tl_discardCurrentSpan thread-local flag. If set (by + SpanGuard::discard()), the span is silently dropped — never entering + the batch queue, never sent over the network, never stored. + + Uses a thread-local flag rather than inspecting Recordable attributes + because the Recordable type varies by exporter (SpanData for simple + exporters, OtlpRecordable for OTLP) and none expose a uniform getter. + The flag is safe because Span::End() calls OnEnd() synchronously on + the same thread. + + All other methods delegate directly to the wrapped processor. + + Dependency diagram: + + +---------------------------+ + | FilteringSpanProcessor | + +---------------------------+ + | - delegate_ : unique_ptr | + | | + +---------------------------+ + | wraps + +---------+-----------+ + | BatchSpanProcessor | + +---------------------+ + + @note Thread safety: OnEnd() may be called concurrently from multiple + threads. The tl_discardCurrentSpan flag is thread-local, so each + thread's discard state is independent — no synchronization needed. +*/ +class FilteringSpanProcessor : public trace_sdk::SpanProcessor +{ + std::unique_ptr delegate_; + +public: + explicit FilteringSpanProcessor(std::unique_ptr delegate) + : delegate_(std::move(delegate)) + { + } + + std::unique_ptr + MakeRecordable() noexcept override + { + return delegate_->MakeRecordable(); + } + + void + OnStart( + trace_sdk::Recordable& span, + opentelemetry::trace::SpanContext const& parentContext) noexcept override + { + delegate_->OnStart(span, parentContext); + } + + void + OnEnd(std::unique_ptr&& span) noexcept override + { + if (tl_discardCurrentSpan) + { + // SpanGuard::discard() set the flag on this thread just before + // calling Span::End(), which invokes OnEnd() synchronously. + // Clear the flag and drop the span. + tl_discardCurrentSpan = false; + return; + } + delegate_->OnEnd(std::move(span)); + } + + bool + ForceFlush( + std::chrono::microseconds timeout = (std::chrono::microseconds::max)()) noexcept override + { + return delegate_->ForceFlush(timeout); + } + + bool + Shutdown( + std::chrono::microseconds timeout = (std::chrono::microseconds::max)()) noexcept override + { + return delegate_->Shutdown(timeout); + } +}; + /** No-op implementation used when XRPL_ENABLE_TELEMETRY is defined but setup.enabled is false at runtime. @@ -175,9 +265,13 @@ public: processorOpts.schedule_delay_millis = std::chrono::milliseconds(setup_.batchDelay); processorOpts.max_export_batch_size = setup_.batchSize; - auto processor = + auto batchProcessor = trace_sdk::BatchSpanProcessorFactory::Create(std::move(exporter), processorOpts); + // Wrap batch processor with filtering processor that drops spans + // marked with kDiscardedAttr (via SpanGuard::discard()). + auto processor = std::make_unique(std::move(batchProcessor)); + // Configure resource attributes auto resourceAttrs = resource::Resource::Create({ {resource::SemanticConventions::kServiceName, setup_.serviceName},