Merge branch 'pratik/otel-phase1b-telemetry-infra' into pratik/otel-phase1c-rpc-integration

Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
This commit is contained in:
Pratik Mankawde
2026-05-29 14:51:12 +01:00
10 changed files with 68 additions and 74 deletions

View File

@@ -10,16 +10,13 @@
"rocksdb/10.5.1#4a197eca381a3e5ae8adf8cffa5aacd0%1765850186.86",
"re2/20251105#8579cfd0bda4daf0683f9e3898f964b4%1774398111.888",
"protobuf/6.33.5#d96d52ba5baaaa532f47bda866ad87a5%1774467363.12",
"opentelemetry-cpp/1.18.0#efd9851e173f8a13b9c7d35232de8cf1%1750409186.472",
"openssl/3.6.1#e6399de266349245a4542fc5f6c71552%1774458290.139",
"nudb/2.0.9#11149c73f8f2baff9a0198fe25971fc7%1774883011.384",
"nlohmann_json/3.11.3#45828be26eb619a2e04ca517bb7b828d%1701220705.259",
"openssl/3.6.1#4789bbf131b77d0515d15e094c8f697f%1778061682.38",
"nudb/2.0.9#11149c73f8f2baff9a0198fe25971fc7%1775040983.408",
"lz4/1.10.0#59fc63cac7f10fbe8e05c7e62c2f3504%1765850143.914",
"libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1765842973.492",
"libcurl/8.18.0#364bc3755cb9ef84ed9a7ae9c7efc1c1%1770984390.024",
"libbacktrace/cci.20210118#a7691bfccd8caaf66309df196790a5a1%1765842973.03",
"libarchive/3.8.1#ffee18995c706e02bf96e7a2f7042e0d%1765850144.736",
"jemalloc/5.3.0#e951da9cf599e956cebc117880d2d9f8%1729241615.244",
"jemalloc/5.3.0#c671e612af76700db5957c9857978a1c%1776700030.961",
"gtest/1.17.0#5224b3b3ff3b4ce1133cbdd27d53ee7d%1768312129.152",
"grpc/1.78.1#b1a9e74b145cc471bed4dc64dc6eb2c1%1774467387.342",
"ed25519/2015.03#ae761bdc52730a843f0809bdf6c1b1f6%1765850143.772",
@@ -33,15 +30,9 @@
"zlib/1.3.1#cac0f6daea041b0ccf42934163defb20%1774439233.809",
"strawberryperl/5.32.1.1#8d114504d172cfea8ea1662d09b6333e%1774447376.964",
"protobuf/6.33.5#d96d52ba5baaaa532f47bda866ad87a5%1774467363.12",
"pkgconf/2.5.1#93c2051284cba1279494a43a4fcfeae2%1757684701.089",
"opentelemetry-proto/1.4.0#4096a3b05916675ef9628f3ffd571f51%1732731336.11",
"ninja/1.13.2#c8c5dc2a52ed6e4e42a66d75b4717ceb%1764096931.974",
"nasm/2.16.01#31e26f2ee3c4346ecd347911bd126904%1765850144.707",
"msys2/cci.latest#d22fe7b2808f5fd34d0a7923ace9c54f%1770657326.649",
"meson/1.10.0#60786758ea978964c24525de19603cf4%1768294926.103",
"m4/1.4.19#5d7a4994e5875d76faf7acf3ed056036%1774365463.87",
"libtool/2.4.7#14e7739cc128bc1623d2ed318008e47e%1755679003.847",
"gnu-config/cci.20210814#466e9d4d7779e1c142443f7ea44b4284%1762363589.329",
"m4/1.4.19#4523e4347b55cd26ae918bd5770cab9a%1778062762.471",
"cmake/4.3.0#b939a42e98f593fb34d3a8c5cc860359%1774439249.183",
"b2/5.4.2#ffd6084a119587e70f11cd45d1a386e2%1774439233.447",
"automake/1.16.5#b91b7c384c3deaa9d535be02da14d04f%1755524470.56",

View File

@@ -150,7 +150,7 @@ class Xrpl(ConanFile):
# OpenTelemetry C++ SDK for distributed tracing (optional).
# Provides OTLP/HTTP exporter, batch span processor, and trace API.
if self.options.telemetry:
self.requires("opentelemetry-cpp/1.18.0")
self.requires("opentelemetry-cpp/1.26.0")
self.requires("xxhash/0.8.3", transitive_headers=True)
exports_sources = (

View File

@@ -64,7 +64,7 @@ cd .build
#### Install dependencies
The `telemetry` option adds `opentelemetry-cpp/1.18.0` as a dependency.
The `telemetry` option adds `opentelemetry-cpp/1.26.0` as a dependency.
If the Conan lockfile does not yet include this package, bypass it with `--lockfile=""`.
```bash
@@ -236,7 +236,7 @@ curl -s -X POST http://127.0.0.1:5005/ \
### Conan lockfile error
If you see `ERROR: Requirement 'opentelemetry-cpp/1.18.0' not in lockfile 'requires'`,
If you see `ERROR: Requirement 'opentelemetry-cpp/1.26.0' not in lockfile 'requires'`,
the lockfile was generated without the telemetry dependency.
Pass `--lockfile=""` to bypass the lockfile, or regenerate it with telemetry enabled.

View File

@@ -2,7 +2,7 @@
/** Thread-local flag for span discard signaling.
SpanGuard::discard() sets tl_discardCurrentSpan to true before calling
SpanGuard::discard() sets gTlDiscardCurrentSpan to true before calling
Span::End(). The OTel SDK calls SpanProcessor::OnEnd() synchronously on
the same thread, so FilteringSpanProcessor checks and clears this flag
in OnEnd() to drop the span before it enters the batch export queue.
@@ -16,12 +16,10 @@
@see SpanGuard::discard(), FilteringSpanProcessor (Telemetry.cpp)
*/
namespace xrpl {
namespace telemetry {
namespace xrpl::telemetry {
/** When true, the FilteringSpanProcessor drops the current span in
OnEnd(). Set by SpanGuard::discard(), cleared by OnEnd(). */
inline thread_local bool tl_discardCurrentSpan = false;
inline thread_local bool gTlDiscardCurrentSpan = false;
} // namespace telemetry
} // namespace xrpl
} // namespace xrpl::telemetry

View File

@@ -4,7 +4,7 @@
Provides the Telemetry base class that all components use to create trace
spans. Three concrete implementations exist, selected at construction time
by make_Telemetry():
by makeTelemetry():
- TelemetryImpl (Telemetry.cpp): real OTel SDK integration, compiled
only when XRPL_ENABLE_TELEMETRY is defined and enabled at runtime.
@@ -111,7 +111,7 @@ class Telemetry
the initialization thread, factory methods load on worker threads.
@see setInstance(), getInstance()
*/
inline static std::atomic<Telemetry*> instance_{nullptr};
inline static std::atomic<Telemetry*> instance{nullptr};
public:
/** Get the global Telemetry instance.
@@ -120,7 +120,7 @@ public:
static Telemetry*
getInstance()
{
return instance_.load(std::memory_order_acquire);
return instance.load(std::memory_order_acquire);
}
/** Set the global Telemetry instance.
@@ -131,7 +131,7 @@ public:
static void
setInstance(Telemetry* t)
{
instance_.store(t, std::memory_order_release);
instance.store(t, std::memory_order_release);
}
/** Configuration parsed from the [telemetry] section of xrpld.cfg.
@@ -313,7 +313,7 @@ public:
@param journal Journal for log output during initialization.
*/
std::unique_ptr<Telemetry>
make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal);
makeTelemetry(Telemetry::Setup const& setup, beast::Journal journal);
/** Parse the [telemetry] config section into a Setup struct.
@@ -325,7 +325,7 @@ make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal);
@return A populated Setup struct with defaults for missing values.
*/
Telemetry::Setup
setup_Telemetry(
setupTelemetry(
Section const& section,
std::string const& nodePublicKey,
std::string const& version,

View File

@@ -1,25 +1,23 @@
/** No-op implementation of the Telemetry interface.
Always compiled (regardless of XRPL_ENABLE_TELEMETRY). Provides the
make_Telemetry() factory when telemetry is compiled out (#ifndef), which
makeTelemetry() factory when telemetry is compiled out (#ifndef), which
unconditionally returns a NullTelemetry that does nothing.
When XRPL_ENABLE_TELEMETRY IS defined, the OTel virtual methods
(getTracer, startSpan) return noop tracers/spans. The make_Telemetry()
(getTracer, startSpan) return noop tracers/spans. The makeTelemetry()
factory in this file is not used in that case -- Telemetry.cpp provides
its own factory that can return the real TelemetryImpl.
*/
#include <xrpl/beast/utility/Journal.h>
#include <xrpl/telemetry/Telemetry.h>
#include <string_view>
#ifdef XRPL_ENABLE_TELEMETRY
#include <opentelemetry/trace/noop.h>
#endif
#include <memory>
#include <utility>
namespace xrpl::telemetry {
namespace {
@@ -122,7 +120,7 @@ public:
*/
#ifndef XRPL_ENABLE_TELEMETRY
std::unique_ptr<Telemetry>
make_Telemetry(Telemetry::Setup const& setup, beast::Journal)
makeTelemetry(Telemetry::Setup const& setup, beast::Journal)
{
return std::make_unique<NullTelemetry>(setup);
}

View File

@@ -35,12 +35,15 @@
#include <opentelemetry/trace/span_startoptions.h>
#include <opentelemetry/trace/tracer.h>
#include <cstdint>
#include <exception>
#include <memory>
#include <string>
#include <string_view>
#include <typeinfo>
#include <utility>
namespace xrpl {
namespace telemetry {
namespace xrpl::telemetry {
namespace otel_trace = opentelemetry::trace;
@@ -164,7 +167,7 @@ SpanGuard
SpanGuard::span(TraceCategory cat, std::string_view prefix, std::string_view name)
{
auto* tel = Telemetry::getInstance();
if (!tel || !tel->isEnabled() || !isCategoryEnabled(*tel, cat))
if ((tel == nullptr) || !tel->isEnabled() || !isCategoryEnabled(*tel, cat))
return {};
auto fullName = std::string(prefix) + "." + std::string(name);
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, categoryToSpanKind(cat))));
@@ -178,7 +181,7 @@ SpanGuard::childSpan(std::string_view name) const
if (!impl_)
return {};
auto* tel = Telemetry::getInstance();
if (!tel || !tel->isEnabled())
if ((tel == nullptr) || !tel->isEnabled())
return {};
auto ctx = opentelemetry::context::RuntimeContext::GetCurrent();
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name, ctx)));
@@ -190,7 +193,7 @@ SpanGuard::childSpan(std::string_view name, SpanContext const& parentCtx)
if (!parentCtx.isValid())
return {};
auto* tel = Telemetry::getInstance();
if (!tel || !tel->isEnabled())
if ((tel == nullptr) || !tel->isEnabled())
return {};
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name, parentCtx.impl_->ctx)));
}
@@ -201,7 +204,7 @@ SpanGuard::linkedSpan(std::string_view name) const
if (!impl_)
return {};
auto* tel = Telemetry::getInstance();
if (!tel || !tel->isEnabled())
if ((tel == nullptr) || !tel->isEnabled())
return {};
auto tracer = tel->getTracer("xrpld");
@@ -228,7 +231,7 @@ SpanGuard::linkedSpan(std::string_view name, SpanContext const& linkCtx)
if (!linkCtx.isValid())
return {};
auto* tel = Telemetry::getInstance();
if (!tel || !tel->isEnabled())
if ((tel == nullptr) || !tel->isEnabled())
return {};
auto tracer = tel->getTracer("xrpld");
@@ -271,9 +274,11 @@ void
SpanGuard::setAttribute(std::string_view key, std::string_view value)
{
if (impl_)
{
impl_->span->SetAttribute(
opentelemetry::nostd::string_view(key.data(), key.size()),
opentelemetry::nostd::string_view(value.data(), value.size()));
}
}
void
@@ -342,14 +347,13 @@ SpanGuard::discard()
{
if (impl_)
{
tl_discardCurrentSpan = true;
gTlDiscardCurrentSpan = true;
impl_->span->End();
impl_->span = nullptr; // prevent ~Impl from calling End() again
impl_.reset();
}
}
} // namespace telemetry
} // namespace xrpl
} // namespace xrpl::telemetry
#endif // XRPL_ENABLE_TELEMETRY

View File

@@ -10,7 +10,7 @@
trace-ID-ratio sampler, and resource attributes.
- NullTelemetryOtel: no-op fallback used when telemetry is compiled in
but disabled at runtime (enabled=0 in config).
- make_Telemetry(): factory that selects the appropriate implementation.
- makeTelemetry(): factory that selects the appropriate implementation.
*/
#ifdef XRPL_ENABLE_TELEMETRY
@@ -34,8 +34,12 @@
#include <opentelemetry/trace/noop.h>
#include <opentelemetry/trace/provider.h>
namespace xrpl {
namespace telemetry {
#include <chrono>
#include <string>
#include <string_view>
#include <utility>
namespace xrpl::telemetry {
namespace {
@@ -47,7 +51,7 @@ namespace resource = opentelemetry::sdk::resource;
/** SpanProcessor decorator that drops discarded spans.
Wraps a delegate processor (typically BatchSpanProcessor). In OnEnd(),
checks the tl_discardCurrentSpan thread-local flag. If set (by
checks the gTlDiscardCurrentSpan thread-local flag. If set (by
SpanGuard::discard()), the span is silently dropped — never entering
the batch queue, never sent over the network, never stored.
@@ -73,12 +77,12 @@ namespace resource = opentelemetry::sdk::resource;
+---------------------+
@note Thread safety: OnEnd() may be called concurrently from multiple
threads. The tl_discardCurrentSpan flag is thread-local, so each
threads. The gTlDiscardCurrentSpan flag is thread-local, so each
thread's discard state is independent — no synchronization needed.
*/
class FilteringSpanProcessor : public trace_sdk::SpanProcessor
{
std::unique_ptr<trace_sdk::SpanProcessor> delegate_;
std::unique_ptr<trace_sdk::SpanProcessor> delegate_{};
public:
explicit FilteringSpanProcessor(std::unique_ptr<trace_sdk::SpanProcessor> delegate)
@@ -103,12 +107,12 @@ public:
void
OnEnd(std::unique_ptr<trace_sdk::Recordable>&& span) noexcept override
{
if (tl_discardCurrentSpan)
if (gTlDiscardCurrentSpan)
{
// SpanGuard::discard() set the flag on this thread just before
// calling Span::End(), which invokes OnEnd() synchronously.
// Clear the flag and drop the span.
tl_discardCurrentSpan = false;
gTlDiscardCurrentSpan = false;
return;
}
delegate_->OnEnd(std::move(span));
@@ -141,7 +145,7 @@ class NullTelemetryOtel : public Telemetry
Setup const setup_;
public:
explicit NullTelemetryOtel(Setup const& setup) : setup_(setup)
explicit NullTelemetryOtel(Setup setup) : setup_(std::move(setup))
{
}
@@ -157,37 +161,37 @@ public:
Telemetry::setInstance(nullptr);
}
bool
[[nodiscard]] bool
isEnabled() const override
{
return false;
}
bool
[[nodiscard]] bool
shouldTraceTransactions() const override
{
return false;
}
bool
[[nodiscard]] bool
shouldTraceConsensus() const override
{
return false;
}
bool
[[nodiscard]] bool
shouldTraceRpc() const override
{
return false;
}
bool
[[nodiscard]] bool
shouldTracePeer() const override
{
return false;
}
bool
[[nodiscard]] bool
shouldTraceLedger() const override
{
return false;
@@ -236,10 +240,10 @@ class TelemetryImpl : public Telemetry
Held as std::shared_ptr so we can call ForceFlush() on shutdown.
Wrapped in a nostd::shared_ptr when registered as the global provider.
*/
std::shared_ptr<trace_sdk::TracerProvider> sdkProvider_;
std::shared_ptr<trace_sdk::TracerProvider> sdkProvider_{};
public:
TelemetryImpl(Setup const& setup, beast::Journal journal) : setup_(setup), journal_(journal)
TelemetryImpl(Setup setup, beast::Journal journal) : setup_(std::move(setup)), journal_(journal)
{
}
@@ -331,37 +335,37 @@ public:
JLOG(journal_.info()) << "Telemetry stopped";
}
bool
[[nodiscard]] bool
isEnabled() const override
{
return true;
}
bool
[[nodiscard]] bool
shouldTraceTransactions() const override
{
return setup_.traceTransactions;
}
bool
[[nodiscard]] bool
shouldTraceConsensus() const override
{
return setup_.traceConsensus;
}
bool
[[nodiscard]] bool
shouldTraceRpc() const override
{
return setup_.traceRpc;
}
bool
[[nodiscard]] bool
shouldTracePeer() const override
{
return setup_.tracePeer;
}
bool
[[nodiscard]] bool
shouldTraceLedger() const override
{
return setup_.traceLedger;
@@ -401,14 +405,13 @@ public:
} // namespace
std::unique_ptr<Telemetry>
make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal)
makeTelemetry(Telemetry::Setup const& setup, beast::Journal journal)
{
if (setup.enabled)
return std::make_unique<TelemetryImpl>(setup, journal);
return std::make_unique<NullTelemetryOtel>(setup);
}
} // namespace telemetry
} // namespace xrpl
} // namespace xrpl::telemetry
#endif // XRPL_ENABLE_TELEMETRY

View File

@@ -41,7 +41,7 @@ networkTypeFromId(std::uint32_t networkId)
} // namespace
Telemetry::Setup
setup_Telemetry(
setupTelemetry(
Section const& section,
std::string const& nodePublicKey,
std::string const& version,

View File

@@ -324,8 +324,8 @@ public:
[this] { signalStop("PerfLog"); }))
, telemetry_(
telemetry::make_Telemetry(
telemetry::setup_Telemetry(
telemetry::makeTelemetry(
telemetry::setupTelemetry(
config_->section("telemetry"),
"", // Updated later via setServiceInstanceId()
BuildInfo::getVersionString(),