mirror of
https://github.com/XRPLF/rippled.git
synced 2026-04-29 15:37:57 +00:00
feat(telemetry): add Phase 4 consensus tracing with SpanGuard API
Instrument the consensus subsystem with OpenTelemetry spans covering
the full round lifecycle: round start, establish phase, proposal send,
ledger close, position updates, consensus check, accept, validation
send, and mode changes.
Key design choices adapted from the original Phase 4 implementation
to the new SpanGuard factory pattern introduced in Phase 3:
- Add SpanGuard::hashSpan() for category-gated hash-derived trace IDs
(consensus round spans share trace_id across validators via ledger hash)
- Add SpanGuard::addEvent() overload with key-value attribute pairs
(used for dispute.resolve events during position updates)
- Add ConsensusSpanNames.h with compile-time span name constants
following the colocated *SpanNames.h pattern from Phase 3
- Add consensusTraceStrategy config option ("deterministic"/"attribute")
for cross-node trace correlation strategy selection
- Use SpanGuard::linkedSpan() for follows-from relationships between
consecutive rounds and cross-thread validation spans
- Use SpanGuard::captureContext() for thread-safe context propagation
from consensus thread to jtACCEPT worker thread
Spans produced: consensus.round, consensus.proposal.send,
consensus.ledger_close, consensus.establish, consensus.update_positions,
consensus.check, consensus.accept, consensus.accept.apply,
consensus.validation.send, consensus.mode_change
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -119,8 +119,10 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <exception>
|
||||
#include <initializer_list>
|
||||
#include <memory>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
@@ -133,6 +135,11 @@ namespace telemetry {
|
||||
*/
|
||||
enum class TraceCategory { Rpc, Transactions, Consensus, Peer, Ledger };
|
||||
|
||||
/** Key-value pair for span event attributes.
|
||||
Used by addEvent(name, attrs) to attach structured metadata to events.
|
||||
*/
|
||||
using EventAttribute = std::pair<std::string_view, std::string_view>;
|
||||
|
||||
/** Opaque wrapper for an OTel context snapshot.
|
||||
|
||||
Used to propagate trace context across threads. Created by
|
||||
@@ -285,6 +292,25 @@ public:
|
||||
std::size_t parentSpanSize,
|
||||
std::uint8_t traceFlags);
|
||||
|
||||
// --- Hash-derived span (generic, category-gated) --------------------
|
||||
|
||||
/** Create a span whose trace_id is derived from arbitrary hash data.
|
||||
trace_id = hashData[0:16], span_id = random. Gated by the given
|
||||
TraceCategory so the span is only created if that category is on.
|
||||
Used for consensus round spans (ledger hash) and any future
|
||||
subsystem that needs deterministic cross-node trace correlation.
|
||||
@param cat Trace subsystem category.
|
||||
@param name Full span name (e.g. "consensus.round").
|
||||
@param hashData Pointer to at least 16 bytes of hash data.
|
||||
@param hashSize Size of the hash buffer (must be >= 16).
|
||||
*/
|
||||
static SpanGuard
|
||||
hashSpan(
|
||||
TraceCategory cat,
|
||||
std::string_view name,
|
||||
std::uint8_t const* hashData,
|
||||
std::size_t hashSize);
|
||||
|
||||
// --- Context capture -----------------------------------------------
|
||||
|
||||
/** Snapshot the current thread's OTel context for cross-thread use.
|
||||
@@ -333,6 +359,14 @@ public:
|
||||
void
|
||||
addEvent(std::string_view name);
|
||||
|
||||
/** Add a named event with key-value attributes to the span's timeline.
|
||||
No-op on a null guard.
|
||||
@param name Event name.
|
||||
@param attrs Attribute pairs (all string_view for simplicity).
|
||||
*/
|
||||
void
|
||||
addEvent(std::string_view name, std::initializer_list<EventAttribute> attrs);
|
||||
|
||||
/** Record an exception as a span event following OTel semantic
|
||||
conventions, and mark the span status as error.
|
||||
No-op on a null guard.
|
||||
@@ -420,6 +454,12 @@ public:
|
||||
return {};
|
||||
}
|
||||
|
||||
static SpanGuard
|
||||
hashSpan(TraceCategory, std::string_view, std::uint8_t const*, std::size_t)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
SpanContext
|
||||
captureContext() const
|
||||
{
|
||||
@@ -460,6 +500,10 @@ public:
|
||||
{
|
||||
}
|
||||
void
|
||||
addEvent(std::string_view, std::initializer_list<EventAttribute>)
|
||||
{
|
||||
}
|
||||
void
|
||||
recordException(std::exception const&)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -188,6 +188,13 @@ public:
|
||||
|
||||
/** Enable tracing for ledger close/accept. */
|
||||
bool traceLedger = true;
|
||||
|
||||
/** Strategy for cross-node consensus trace correlation.
|
||||
"deterministic" — derive trace_id from ledger hash so all
|
||||
validators in the same round share the same trace_id.
|
||||
"attribute" — random trace_id, correlate via ledger_id attribute.
|
||||
*/
|
||||
std::string consensusTraceStrategy = "deterministic";
|
||||
};
|
||||
|
||||
virtual ~Telemetry() = default;
|
||||
@@ -245,6 +252,10 @@ public:
|
||||
virtual bool
|
||||
shouldTraceLedger() const = 0;
|
||||
|
||||
/** @return The configured consensus trace correlation strategy. */
|
||||
virtual std::string const&
|
||||
getConsensusTraceStrategy() const = 0;
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
/** Get or create a named tracer instance.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user