mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-04 01:06:48 +00:00
feat(telemetry): add FilteringSpanProcessor and SpanGuard::discard()
Add span discard mechanism that drops unwanted spans before they enter the batch export queue, saving both network bandwidth and storage. FilteringSpanProcessor is a custom SpanProcessor decorator that wraps BatchSpanProcessor. SpanGuard::discard() sets a thread-local flag (tl_discardCurrentSpan) before calling Span::End(). The OTel SDK calls OnEnd() synchronously on the same thread, where the flag is checked and cleared to drop the span. New file: DiscardFlag.h — zero-dependency header for the thread-local flag, avoiding transitive include bloat from Telemetry.h. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
48
docs/build/telemetry.md
vendored
48
docs/build/telemetry.md
vendored
@@ -251,18 +251,42 @@ The Conan package provides a single umbrella target
|
||||
|
||||
### Key files
|
||||
|
||||
| File | Purpose |
|
||||
| ---------------------------------------------- | ----------------------------------------------------------- |
|
||||
| `include/xrpl/telemetry/Telemetry.h` | Abstract telemetry interface and `Setup` struct |
|
||||
| `include/xrpl/telemetry/SpanGuard.h` | RAII span guard (activates scope, ends span on destruction) |
|
||||
| `src/libxrpl/telemetry/Telemetry.cpp` | OTel-backed implementation (`TelemetryImpl`) |
|
||||
| `src/libxrpl/telemetry/TelemetryConfig.cpp` | Config parser (`setup_Telemetry()`) |
|
||||
| `src/libxrpl/telemetry/NullTelemetry.cpp` | No-op implementation (used when disabled) |
|
||||
| `src/xrpld/telemetry/TracingInstrumentation.h` | Convenience macros (`XRPL_TRACE_RPC`, etc.) |
|
||||
| `src/xrpld/rpc/detail/ServerHandler.cpp` | RPC entry point instrumentation |
|
||||
| `src/xrpld/rpc/detail/RPCHandler.cpp` | Per-command instrumentation |
|
||||
| `docker/telemetry/docker-compose.yml` | Observability stack (Collector + Tempo + Grafana) |
|
||||
| `docker/telemetry/otel-collector-config.yaml` | OTel Collector pipeline configuration |
|
||||
| File | Purpose |
|
||||
| ---------------------------------------------- | ------------------------------------------------------------ |
|
||||
| `include/xrpl/telemetry/Telemetry.h` | Abstract telemetry interface and `Setup` struct |
|
||||
| `include/xrpl/telemetry/SpanGuard.h` | RAII span guard with `discard()` for dropping unwanted spans |
|
||||
| `include/xrpl/telemetry/DiscardFlag.h` | Thread-local discard flag (zero-dependency header) |
|
||||
| `src/libxrpl/telemetry/Telemetry.cpp` | OTel SDK setup, `FilteringSpanProcessor`, provider lifecycle |
|
||||
| `src/libxrpl/telemetry/TelemetryConfig.cpp` | Config parser (`setup_Telemetry()`) |
|
||||
| `src/libxrpl/telemetry/NullTelemetry.cpp` | No-op implementation (used when disabled) |
|
||||
| `src/xrpld/telemetry/TracingInstrumentation.h` | Convenience macros (`XRPL_TRACE_RPC`, etc.) |
|
||||
| `src/xrpld/rpc/detail/ServerHandler.cpp` | RPC entry point instrumentation |
|
||||
| `src/xrpld/rpc/detail/RPCHandler.cpp` | Per-command instrumentation |
|
||||
| `docker/telemetry/docker-compose.yml` | Observability stack (Collector + Tempo + Grafana) |
|
||||
| `docker/telemetry/otel-collector-config.yaml` | OTel Collector pipeline configuration |
|
||||
|
||||
### Span discard mechanism
|
||||
|
||||
`SpanGuard::discard()` allows callers to silently drop spans that turn out to be
|
||||
uninteresting (e.g., failed preflight transactions). This saves both network bandwidth
|
||||
and storage by preventing the span from being exported.
|
||||
|
||||
The mechanism uses a thread-local flag (`tl_discardCurrentSpan` in `DiscardFlag.h`) as a
|
||||
side-channel to the `FilteringSpanProcessor` (in `Telemetry.cpp`):
|
||||
|
||||
1. `SpanGuard::discard()` sets the thread-local flag and calls `Span::End()`
|
||||
2. The OTel SDK calls `FilteringSpanProcessor::OnEnd()` synchronously on the same thread
|
||||
3. The processor checks the flag, clears it, and drops the span before it enters the batch queue
|
||||
|
||||
```cpp
|
||||
SpanGuard guard(telemetry.startSpan("tx.process"));
|
||||
auto result = preflight(tx);
|
||||
if (result != tesSUCCESS)
|
||||
{
|
||||
guard.discard(); // span is dropped, never exported
|
||||
return result;
|
||||
}
|
||||
```
|
||||
|
||||
### Conditional compilation
|
||||
|
||||
|
||||
27
include/xrpl/telemetry/DiscardFlag.h
Normal file
27
include/xrpl/telemetry/DiscardFlag.h
Normal file
@@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
/** Thread-local flag for span discard signaling.
|
||||
|
||||
SpanGuard::discard() sets tl_discardCurrentSpan to true before calling
|
||||
Span::End(). The OTel SDK calls SpanProcessor::OnEnd() synchronously on
|
||||
the same thread, so FilteringSpanProcessor checks and clears this flag
|
||||
in OnEnd() to drop the span before it enters the batch export queue.
|
||||
|
||||
This side-channel avoids inspecting the Recordable's internals (which
|
||||
vary by exporter type — SpanData vs OtlpRecordable).
|
||||
|
||||
Kept in a separate header to avoid transitive include bloat: SpanGuard.h
|
||||
only needs this flag, not the full Telemetry.h with BasicConfig/Journal.
|
||||
|
||||
@see SpanGuard::discard(), FilteringSpanProcessor (Telemetry.cpp)
|
||||
*/
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
/** When true, the FilteringSpanProcessor drops the current span in
|
||||
OnEnd(). Set by SpanGuard::discard(), cleared by OnEnd(). */
|
||||
inline thread_local bool tl_discardCurrentSpan = false;
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
@@ -69,6 +69,8 @@
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
#include <xrpl/telemetry/DiscardFlag.h>
|
||||
|
||||
#include <opentelemetry/context/runtime_context.h>
|
||||
#include <opentelemetry/nostd/shared_ptr.h>
|
||||
#include <opentelemetry/trace/scope.h>
|
||||
@@ -202,6 +204,39 @@ public:
|
||||
{
|
||||
return opentelemetry::context::RuntimeContext::GetCurrent();
|
||||
}
|
||||
|
||||
/** Mark this span for discard and end it immediately.
|
||||
|
||||
Sets the tl_discardCurrentSpan thread-local flag before calling
|
||||
End(). The OTel SDK calls FilteringSpanProcessor::OnEnd()
|
||||
synchronously on the same thread, where the flag is checked and
|
||||
cleared. The span is dropped before entering the batch export
|
||||
queue — never sent over the network or stored.
|
||||
|
||||
After calling discard(), the guard is inert — the destructor will
|
||||
not call End() again.
|
||||
|
||||
Typical usage:
|
||||
@code
|
||||
SpanGuard guard(telemetry.startSpan("tx.process"));
|
||||
auto result = preflight(tx);
|
||||
if (result != tesSUCCESS)
|
||||
{
|
||||
guard.discard();
|
||||
return result;
|
||||
}
|
||||
@endcode
|
||||
*/
|
||||
void
|
||||
discard()
|
||||
{
|
||||
if (span_)
|
||||
{
|
||||
tl_discardCurrentSpan = true;
|
||||
span_->End();
|
||||
span_ = nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace telemetry
|
||||
|
||||
@@ -3,8 +3,11 @@
|
||||
Compiled only when XRPL_ENABLE_TELEMETRY is defined (via CMake
|
||||
telemetry=ON). Contains:
|
||||
|
||||
- FilteringSpanProcessor: decorator that drops spans marked with
|
||||
kDiscardedAttr before they enter the batch export queue.
|
||||
- TelemetryImpl: configures the OTel SDK with an OTLP/HTTP exporter,
|
||||
batch span processor, trace-ID-ratio sampler, and resource attributes.
|
||||
FilteringSpanProcessor wrapping a batch span processor,
|
||||
trace-ID-ratio sampler, and resource attributes.
|
||||
- NullTelemetryOtel: no-op fallback used when telemetry is compiled in
|
||||
but disabled at runtime (enabled=0 in config).
|
||||
- make_Telemetry(): factory that selects the appropriate implementation.
|
||||
@@ -13,6 +16,7 @@
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/telemetry/DiscardFlag.h>
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
#include <opentelemetry/exporters/otlp/otlp_http_exporter_factory.h>
|
||||
@@ -20,6 +24,7 @@
|
||||
#include <opentelemetry/sdk/resource/semantic_conventions.h>
|
||||
#include <opentelemetry/sdk/trace/batch_span_processor_factory.h>
|
||||
#include <opentelemetry/sdk/trace/batch_span_processor_options.h>
|
||||
#include <opentelemetry/sdk/trace/processor.h>
|
||||
#include <opentelemetry/sdk/trace/sampler.h>
|
||||
#include <opentelemetry/sdk/trace/samplers/trace_id_ratio.h>
|
||||
#include <opentelemetry/sdk/trace/tracer_provider.h>
|
||||
@@ -37,6 +42,91 @@ namespace trace_sdk = opentelemetry::sdk::trace;
|
||||
namespace otlp_http = opentelemetry::exporter::otlp;
|
||||
namespace resource = opentelemetry::sdk::resource;
|
||||
|
||||
/** SpanProcessor decorator that drops discarded spans.
|
||||
|
||||
Wraps a delegate processor (typically BatchSpanProcessor). In OnEnd(),
|
||||
checks the tl_discardCurrentSpan thread-local flag. If set (by
|
||||
SpanGuard::discard()), the span is silently dropped — never entering
|
||||
the batch queue, never sent over the network, never stored.
|
||||
|
||||
Uses a thread-local flag rather than inspecting Recordable attributes
|
||||
because the Recordable type varies by exporter (SpanData for simple
|
||||
exporters, OtlpRecordable for OTLP) and none expose a uniform getter.
|
||||
The flag is safe because Span::End() calls OnEnd() synchronously on
|
||||
the same thread.
|
||||
|
||||
All other methods delegate directly to the wrapped processor.
|
||||
|
||||
Dependency diagram:
|
||||
|
||||
+---------------------------+
|
||||
| FilteringSpanProcessor |
|
||||
+---------------------------+
|
||||
| - delegate_ : unique_ptr |
|
||||
| <SpanProcessor> |
|
||||
+---------------------------+
|
||||
| wraps
|
||||
+---------+-----------+
|
||||
| BatchSpanProcessor |
|
||||
+---------------------+
|
||||
|
||||
@note Thread safety: OnEnd() may be called concurrently from multiple
|
||||
threads. The tl_discardCurrentSpan flag is thread-local, so each
|
||||
thread's discard state is independent — no synchronization needed.
|
||||
*/
|
||||
class FilteringSpanProcessor : public trace_sdk::SpanProcessor
|
||||
{
|
||||
std::unique_ptr<trace_sdk::SpanProcessor> delegate_;
|
||||
|
||||
public:
|
||||
explicit FilteringSpanProcessor(std::unique_ptr<trace_sdk::SpanProcessor> delegate)
|
||||
: delegate_(std::move(delegate))
|
||||
{
|
||||
}
|
||||
|
||||
std::unique_ptr<trace_sdk::Recordable>
|
||||
MakeRecordable() noexcept override
|
||||
{
|
||||
return delegate_->MakeRecordable();
|
||||
}
|
||||
|
||||
void
|
||||
OnStart(
|
||||
trace_sdk::Recordable& span,
|
||||
opentelemetry::trace::SpanContext const& parentContext) noexcept override
|
||||
{
|
||||
delegate_->OnStart(span, parentContext);
|
||||
}
|
||||
|
||||
void
|
||||
OnEnd(std::unique_ptr<trace_sdk::Recordable>&& span) noexcept override
|
||||
{
|
||||
if (tl_discardCurrentSpan)
|
||||
{
|
||||
// SpanGuard::discard() set the flag on this thread just before
|
||||
// calling Span::End(), which invokes OnEnd() synchronously.
|
||||
// Clear the flag and drop the span.
|
||||
tl_discardCurrentSpan = false;
|
||||
return;
|
||||
}
|
||||
delegate_->OnEnd(std::move(span));
|
||||
}
|
||||
|
||||
bool
|
||||
ForceFlush(
|
||||
std::chrono::microseconds timeout = (std::chrono::microseconds::max)()) noexcept override
|
||||
{
|
||||
return delegate_->ForceFlush(timeout);
|
||||
}
|
||||
|
||||
bool
|
||||
Shutdown(
|
||||
std::chrono::microseconds timeout = (std::chrono::microseconds::max)()) noexcept override
|
||||
{
|
||||
return delegate_->Shutdown(timeout);
|
||||
}
|
||||
};
|
||||
|
||||
/** No-op implementation used when XRPL_ENABLE_TELEMETRY is defined but
|
||||
setup.enabled is false at runtime.
|
||||
|
||||
@@ -175,9 +265,13 @@ public:
|
||||
processorOpts.schedule_delay_millis = std::chrono::milliseconds(setup_.batchDelay);
|
||||
processorOpts.max_export_batch_size = setup_.batchSize;
|
||||
|
||||
auto processor =
|
||||
auto batchProcessor =
|
||||
trace_sdk::BatchSpanProcessorFactory::Create(std::move(exporter), processorOpts);
|
||||
|
||||
// Wrap batch processor with filtering processor that drops spans
|
||||
// marked with kDiscardedAttr (via SpanGuard::discard()).
|
||||
auto processor = std::make_unique<FilteringSpanProcessor>(std::move(batchProcessor));
|
||||
|
||||
// Configure resource attributes
|
||||
auto resourceAttrs = resource::Resource::Create({
|
||||
{resource::SemanticConventions::kServiceName, setup_.serviceName},
|
||||
|
||||
Reference in New Issue
Block a user