mirror of
https://github.com/XRPLF/rippled.git
synced 2026-04-29 15:37:57 +00:00
Instrument the consensus subsystem with OpenTelemetry spans covering
the full round lifecycle: round start, establish phase, proposal send,
ledger close, position updates, consensus check, accept, validation
send, and mode changes.
Key design choices adapted from the original Phase 4 implementation
to the new SpanGuard factory pattern introduced in Phase 3:
- Add SpanGuard::hashSpan() for category-gated hash-derived trace IDs
(consensus round spans share trace_id across validators via ledger hash)
- Add SpanGuard::addEvent() overload with key-value attribute pairs
(used for dispute.resolve events during position updates)
- Add ConsensusSpanNames.h with compile-time span name constants
following the colocated *SpanNames.h pattern from Phase 3
- Add consensusTraceStrategy config option ("deterministic"/"attribute")
for cross-node trace correlation strategy selection
- Use SpanGuard::linkedSpan() for follows-from relationships between
consecutive rounds and cross-thread validation spans
- Use SpanGuard::captureContext() for thread-safe context propagation
from consensus thread to jtACCEPT worker thread
Spans produced: consensus.round, consensus.proposal.send,
consensus.ledger_close, consensus.establish, consensus.update_positions,
consensus.check, consensus.accept, consensus.accept.apply,
consensus.validation.send, consensus.mode_change
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
458 lines
13 KiB
C++
458 lines
13 KiB
C++
/** Pimpl implementation for SpanGuard and SpanContext.
|
|
|
|
All OpenTelemetry SDK types are confined to this translation unit.
|
|
The public SpanGuard.h header contains only standard-library types
|
|
and forward-declares the Impl struct.
|
|
|
|
Static factory methods access the global Telemetry instance via
|
|
Telemetry::getInstance(), check whether the requested TraceCategory
|
|
is enabled, and return either an active guard with a real Span+Scope
|
|
or a null guard whose methods are all no-ops.
|
|
|
|
The Impl struct holds the OTel Span (shared_ptr) and Scope.
|
|
Scope is non-movable, but since Impl lives behind a unique_ptr,
|
|
SpanGuard's move constructor simply transfers the pointer — no
|
|
double-Scope issues.
|
|
|
|
@see SpanGuard (SpanGuard.h), Telemetry (Telemetry.h),
|
|
FilteringSpanProcessor (Telemetry.cpp)
|
|
*/
|
|
|
|
#ifdef XRPL_ENABLE_TELEMETRY
|
|
|
|
#include <xrpl/basics/random.h>
|
|
#include <xrpl/telemetry/DiscardFlag.h>
|
|
#include <xrpl/telemetry/SpanGuard.h>
|
|
#include <xrpl/telemetry/SpanNames.h>
|
|
#include <xrpl/telemetry/Telemetry.h>
|
|
|
|
#include <opentelemetry/context/runtime_context.h>
|
|
#include <opentelemetry/nostd/shared_ptr.h>
|
|
#include <opentelemetry/trace/context.h>
|
|
#include <opentelemetry/trace/default_span.h>
|
|
#include <opentelemetry/trace/provider.h>
|
|
#include <opentelemetry/trace/scope.h>
|
|
#include <opentelemetry/trace/span.h>
|
|
#include <opentelemetry/trace/span_context.h>
|
|
#include <opentelemetry/trace/span_startoptions.h>
|
|
#include <opentelemetry/trace/trace_flags.h>
|
|
#include <opentelemetry/trace/trace_id.h>
|
|
#include <opentelemetry/trace/tracer.h>
|
|
|
|
#include <cstring>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace xrpl {
|
|
namespace telemetry {
|
|
|
|
namespace otel_trace = opentelemetry::trace;
|
|
|
|
// ===== SpanContext::Impl ===================================================
|
|
|
|
struct SpanContext::Impl
|
|
{
|
|
opentelemetry::context::Context ctx;
|
|
|
|
explicit Impl(opentelemetry::context::Context c) : ctx(std::move(c))
|
|
{
|
|
}
|
|
};
|
|
|
|
SpanContext::SpanContext(std::shared_ptr<Impl> impl) : impl_(std::move(impl))
|
|
{
|
|
}
|
|
|
|
bool
|
|
SpanContext::isValid() const
|
|
{
|
|
return impl_ != nullptr;
|
|
}
|
|
|
|
// ===== SpanGuard::Impl ====================================================
|
|
|
|
struct SpanGuard::Impl
|
|
{
|
|
/** The OTel span being guarded. Set to nullptr after discard(). */
|
|
opentelemetry::nostd::shared_ptr<otel_trace::Span> span;
|
|
|
|
/** Scope that activates span on the current thread's context stack. */
|
|
otel_trace::Scope scope;
|
|
|
|
explicit Impl(opentelemetry::nostd::shared_ptr<otel_trace::Span> s)
|
|
: span(std::move(s)), scope(span)
|
|
{
|
|
}
|
|
|
|
~Impl()
|
|
{
|
|
if (span)
|
|
span->End();
|
|
}
|
|
|
|
Impl(Impl const&) = delete;
|
|
Impl&
|
|
operator=(Impl const&) = delete;
|
|
Impl(Impl&&) = delete;
|
|
Impl&
|
|
operator=(Impl&&) = delete;
|
|
};
|
|
|
|
// ===== SpanGuard core lifecycle ============================================
|
|
|
|
SpanGuard::SpanGuard() = default;
|
|
SpanGuard::~SpanGuard() = default;
|
|
SpanGuard::SpanGuard(SpanGuard&&) noexcept = default;
|
|
|
|
SpanGuard::SpanGuard(std::unique_ptr<Impl> impl) : impl_(std::move(impl))
|
|
{
|
|
}
|
|
|
|
SpanGuard::
|
|
operator bool() const
|
|
{
|
|
return impl_ != nullptr;
|
|
}
|
|
|
|
// ===== Static factory methods ==============================================
|
|
|
|
/** Check whether the given TraceCategory is enabled on the Telemetry instance.
|
|
@return true if the category's shouldTrace*() flag is on.
|
|
*/
|
|
static bool
|
|
isCategoryEnabled(Telemetry const& tel, TraceCategory cat)
|
|
{
|
|
switch (cat)
|
|
{
|
|
case TraceCategory::Rpc:
|
|
return tel.shouldTraceRpc();
|
|
case TraceCategory::Transactions:
|
|
return tel.shouldTraceTransactions();
|
|
case TraceCategory::Consensus:
|
|
return tel.shouldTraceConsensus();
|
|
case TraceCategory::Peer:
|
|
return tel.shouldTracePeer();
|
|
case TraceCategory::Ledger:
|
|
return tel.shouldTraceLedger();
|
|
}
|
|
return false; // unreachable, silences compiler warning
|
|
}
|
|
|
|
SpanGuard
|
|
SpanGuard::span(std::string_view name)
|
|
{
|
|
auto* tel = Telemetry::getInstance();
|
|
if (!tel || !tel->isEnabled())
|
|
return {};
|
|
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name)));
|
|
}
|
|
|
|
SpanGuard
|
|
SpanGuard::span(TraceCategory cat, std::string_view prefix, std::string_view name)
|
|
{
|
|
auto* tel = Telemetry::getInstance();
|
|
if (!tel || !tel->isEnabled() || !isCategoryEnabled(*tel, cat))
|
|
return {};
|
|
auto fullName = std::string(prefix) + "." + std::string(name);
|
|
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName)));
|
|
}
|
|
|
|
// ===== Child / linked span creation ========================================
|
|
|
|
SpanGuard
|
|
SpanGuard::childSpan(std::string_view name) const
|
|
{
|
|
if (!impl_)
|
|
return {};
|
|
auto* tel = Telemetry::getInstance();
|
|
if (!tel || !tel->isEnabled())
|
|
return {};
|
|
auto ctx = opentelemetry::context::RuntimeContext::GetCurrent();
|
|
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name, ctx)));
|
|
}
|
|
|
|
SpanGuard
|
|
SpanGuard::childSpan(std::string_view name, SpanContext const& parentCtx)
|
|
{
|
|
if (!parentCtx.isValid())
|
|
return {};
|
|
auto* tel = Telemetry::getInstance();
|
|
if (!tel || !tel->isEnabled())
|
|
return {};
|
|
return SpanGuard(std::make_unique<Impl>(tel->startSpan(name, parentCtx.impl_->ctx)));
|
|
}
|
|
|
|
SpanGuard
|
|
SpanGuard::linkedSpan(std::string_view name) const
|
|
{
|
|
if (!impl_)
|
|
return {};
|
|
auto* tel = Telemetry::getInstance();
|
|
if (!tel || !tel->isEnabled())
|
|
return {};
|
|
|
|
auto tracer = tel->getTracer("xrpld");
|
|
auto spanCtx = impl_->span->GetContext();
|
|
|
|
// Mark as root span so it starts a new trace sub-tree rather than
|
|
// inheriting the current thread's active span as parent.
|
|
otel_trace::StartSpanOptions opts;
|
|
opentelemetry::context::Context rootCtx;
|
|
rootCtx = rootCtx.SetValue(otel_trace::kIsRootSpanKey, true);
|
|
opts.parent = rootCtx;
|
|
|
|
return SpanGuard(
|
|
std::make_unique<Impl>(tracer->StartSpan(
|
|
std::string(name),
|
|
{},
|
|
{{spanCtx, {{std::string(attr::linkType), std::string(attr_val::followsFrom)}}}},
|
|
opts)));
|
|
}
|
|
|
|
SpanGuard
|
|
SpanGuard::linkedSpan(std::string_view name, SpanContext const& linkCtx)
|
|
{
|
|
if (!linkCtx.isValid())
|
|
return {};
|
|
auto* tel = Telemetry::getInstance();
|
|
if (!tel || !tel->isEnabled())
|
|
return {};
|
|
|
|
auto tracer = tel->getTracer("xrpld");
|
|
|
|
// Extract the span from the captured context to get its SpanContext.
|
|
auto linkSpan = otel_trace::GetSpan(linkCtx.impl_->ctx);
|
|
if (!linkSpan || !linkSpan->GetContext().IsValid())
|
|
return {};
|
|
|
|
// Mark as root span so it starts a new trace sub-tree rather than
|
|
// inheriting the current thread's active span as parent.
|
|
otel_trace::StartSpanOptions opts;
|
|
opentelemetry::context::Context rootCtx;
|
|
rootCtx = rootCtx.SetValue(otel_trace::kIsRootSpanKey, true);
|
|
opts.parent = rootCtx;
|
|
|
|
return SpanGuard(
|
|
std::make_unique<Impl>(tracer->StartSpan(
|
|
std::string(name),
|
|
{},
|
|
{{linkSpan->GetContext(),
|
|
{{std::string(attr::linkType), std::string(attr_val::followsFrom)}}}},
|
|
opts)));
|
|
}
|
|
|
|
// ===== Transaction span with hash-derived trace ID ========================
|
|
|
|
SpanGuard
|
|
SpanGuard::txSpan(
|
|
std::string_view prefix,
|
|
std::string_view name,
|
|
std::uint8_t const* hashData,
|
|
std::size_t hashSize)
|
|
{
|
|
if (hashSize < 16)
|
|
return {};
|
|
auto* tel = Telemetry::getInstance();
|
|
if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions())
|
|
return {};
|
|
|
|
otel_trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(hashData, 16));
|
|
|
|
auto const rval = default_prng()();
|
|
std::uint8_t spanIdBytes[8];
|
|
std::memcpy(spanIdBytes, &rval, sizeof(spanIdBytes));
|
|
otel_trace::SpanId spanId(opentelemetry::nostd::span<std::uint8_t const, 8>(spanIdBytes, 8));
|
|
|
|
otel_trace::SpanContext syntheticCtx(
|
|
traceId, spanId, otel_trace::TraceFlags(1), /* remote = */ false);
|
|
|
|
auto parentCtx = opentelemetry::context::Context{}.SetValue(
|
|
otel_trace::kSpanKey,
|
|
opentelemetry::nostd::shared_ptr<otel_trace::Span>(
|
|
new otel_trace::DefaultSpan(syntheticCtx)));
|
|
|
|
auto fullName = std::string(prefix) + "." + std::string(name);
|
|
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, parentCtx)));
|
|
}
|
|
|
|
SpanGuard
|
|
SpanGuard::txSpan(
|
|
std::string_view prefix,
|
|
std::string_view name,
|
|
std::uint8_t const* hashData,
|
|
std::size_t hashSize,
|
|
std::uint8_t const* parentSpanId,
|
|
std::size_t parentSpanSize,
|
|
std::uint8_t traceFlags)
|
|
{
|
|
if (hashSize < 16 || parentSpanSize != 8)
|
|
return {};
|
|
auto* tel = Telemetry::getInstance();
|
|
if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions())
|
|
return {};
|
|
|
|
otel_trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(hashData, 16));
|
|
|
|
otel_trace::SpanId parentSpan(
|
|
opentelemetry::nostd::span<std::uint8_t const, 8>(parentSpanId, 8));
|
|
|
|
otel_trace::SpanContext combinedCtx(
|
|
traceId, parentSpan, otel_trace::TraceFlags(traceFlags), /* remote = */ true);
|
|
|
|
auto parentCtx = opentelemetry::context::Context{}.SetValue(
|
|
otel_trace::kSpanKey,
|
|
opentelemetry::nostd::shared_ptr<otel_trace::Span>(
|
|
new otel_trace::DefaultSpan(combinedCtx)));
|
|
|
|
auto fullName = std::string(prefix) + "." + std::string(name);
|
|
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, parentCtx)));
|
|
}
|
|
|
|
// ===== Hash-derived span (generic, category-gated) =========================
|
|
|
|
SpanGuard
|
|
SpanGuard::hashSpan(
|
|
TraceCategory cat,
|
|
std::string_view name,
|
|
std::uint8_t const* hashData,
|
|
std::size_t hashSize)
|
|
{
|
|
if (hashSize < 16)
|
|
return {};
|
|
auto* tel = Telemetry::getInstance();
|
|
if (!tel || !tel->isEnabled() || !isCategoryEnabled(*tel, cat))
|
|
return {};
|
|
|
|
otel_trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(hashData, 16));
|
|
|
|
std::uint8_t spanIdBytes[8];
|
|
std::random_device rd;
|
|
for (auto& b : spanIdBytes)
|
|
b = static_cast<std::uint8_t>(rd());
|
|
otel_trace::SpanId spanId(opentelemetry::nostd::span<std::uint8_t const, 8>(spanIdBytes, 8));
|
|
|
|
otel_trace::SpanContext syntheticCtx(
|
|
traceId, spanId, otel_trace::TraceFlags(1), /* remote = */ false);
|
|
|
|
auto parentCtx = opentelemetry::context::Context{}.SetValue(
|
|
otel_trace::kSpanKey,
|
|
opentelemetry::nostd::shared_ptr<otel_trace::Span>(
|
|
new otel_trace::DefaultSpan(syntheticCtx)));
|
|
|
|
return SpanGuard(std::make_unique<Impl>(tel->startSpan(std::string(name), parentCtx)));
|
|
}
|
|
|
|
// ===== Context capture =====================================================
|
|
|
|
SpanContext
|
|
SpanGuard::captureContext() const
|
|
{
|
|
if (!impl_)
|
|
return {};
|
|
auto ctx = opentelemetry::context::RuntimeContext::GetCurrent();
|
|
return SpanContext(std::make_shared<SpanContext::Impl>(ctx));
|
|
}
|
|
|
|
// ===== Attribute setters ===================================================
|
|
|
|
void
|
|
SpanGuard::setAttribute(std::string_view key, std::string_view value)
|
|
{
|
|
if (impl_)
|
|
impl_->span->SetAttribute(
|
|
opentelemetry::nostd::string_view(key.data(), key.size()),
|
|
opentelemetry::nostd::string_view(value.data(), value.size()));
|
|
}
|
|
|
|
void
|
|
SpanGuard::setAttribute(std::string_view key, char const* value)
|
|
{
|
|
setAttribute(key, std::string_view(value));
|
|
}
|
|
|
|
void
|
|
SpanGuard::setAttribute(std::string_view key, std::int64_t value)
|
|
{
|
|
if (impl_)
|
|
impl_->span->SetAttribute(opentelemetry::nostd::string_view(key.data(), key.size()), value);
|
|
}
|
|
|
|
void
|
|
SpanGuard::setAttribute(std::string_view key, double value)
|
|
{
|
|
if (impl_)
|
|
impl_->span->SetAttribute(opentelemetry::nostd::string_view(key.data(), key.size()), value);
|
|
}
|
|
|
|
void
|
|
SpanGuard::setAttribute(std::string_view key, bool value)
|
|
{
|
|
if (impl_)
|
|
impl_->span->SetAttribute(opentelemetry::nostd::string_view(key.data(), key.size()), value);
|
|
}
|
|
|
|
// ===== Status / events =====================================================
|
|
|
|
void
|
|
SpanGuard::setOk()
|
|
{
|
|
if (impl_)
|
|
impl_->span->SetStatus(otel_trace::StatusCode::kOk);
|
|
}
|
|
|
|
void
|
|
SpanGuard::setError(std::string_view description)
|
|
{
|
|
if (impl_)
|
|
impl_->span->SetStatus(otel_trace::StatusCode::kError, std::string(description));
|
|
}
|
|
|
|
void
|
|
SpanGuard::addEvent(std::string_view name)
|
|
{
|
|
if (impl_)
|
|
impl_->span->AddEvent(std::string(name));
|
|
}
|
|
|
|
void
|
|
SpanGuard::addEvent(std::string_view name, std::initializer_list<EventAttribute> attrs)
|
|
{
|
|
if (!impl_)
|
|
return;
|
|
// Own the strings to ensure lifetime safety through the AddEvent call.
|
|
std::vector<std::pair<std::string, std::string>> owned;
|
|
owned.reserve(attrs.size());
|
|
for (auto const& [k, v] : attrs)
|
|
owned.emplace_back(std::string(k), std::string(v));
|
|
impl_->span->AddEvent(std::string(name), owned);
|
|
}
|
|
|
|
void
|
|
SpanGuard::recordException(std::exception const& e)
|
|
{
|
|
if (!impl_)
|
|
return;
|
|
impl_->span->AddEvent(
|
|
"exception",
|
|
{{"exception.type", "std::exception"}, {"exception.message", std::string(e.what())}});
|
|
impl_->span->SetStatus(otel_trace::StatusCode::kError, e.what());
|
|
}
|
|
|
|
void
|
|
SpanGuard::discard()
|
|
{
|
|
if (impl_)
|
|
{
|
|
tl_discardCurrentSpan = true;
|
|
impl_->span->End();
|
|
impl_->span = nullptr; // prevent ~Impl from calling End() again
|
|
impl_.reset();
|
|
}
|
|
}
|
|
|
|
} // namespace telemetry
|
|
} // namespace xrpl
|
|
|
|
#endif // XRPL_ENABLE_TELEMETRY
|