From ded848075dc2885af2be939e03084eccbd023140 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Tue, 21 Apr 2026 17:31:16 +0100 Subject: [PATCH] feat(telemetry): add hash-derived trace IDs for transaction spans Derive trace_id from txHash[0:16] so all nodes handling the same transaction produce spans under the same trace. Protobuf span_id propagation provides parent-child relay ordering when available. - Add SpanGuard::txSpan() factory methods (hash-derived trace ID) - Add TxTracing.h helpers: txReceiveSpan(), txProcessSpan() - Update PeerImp and NetworkOPs to use the new helpers Co-Authored-By: Claude Opus 4.6 (1M context) --- include/xrpl/telemetry/SpanGuard.h | 58 ++++++++++++++++++++++ src/libxrpl/telemetry/SpanGuard.cpp | 73 ++++++++++++++++++++++++++++ src/xrpld/app/misc/NetworkOPs.cpp | 4 +- src/xrpld/overlay/detail/PeerImp.cpp | 16 +++--- src/xrpld/telemetry/TxTracing.h | 64 ++++++++++++++++++++++++ 5 files changed, 204 insertions(+), 11 deletions(-) create mode 100644 src/xrpld/telemetry/TxTracing.h diff --git a/include/xrpl/telemetry/SpanGuard.h b/include/xrpl/telemetry/SpanGuard.h index 6718052219..47cd7b29cd 100644 --- a/include/xrpl/telemetry/SpanGuard.h +++ b/include/xrpl/telemetry/SpanGuard.h @@ -237,6 +237,46 @@ public: [[nodiscard]] static SpanGuard linkedSpan(std::string_view name, SpanContext const& linkCtx); + // --- Transaction span with hash-derived trace ID ------------------- + + /** Create a span whose trace_id is derived from a transaction hash. + trace_id = hashData[0:16], span_id = random. All nodes handling + the same transaction independently produce spans under the same + trace, enabling cross-node correlation without context propagation. + @param prefix Span name prefix (e.g. "tx"). + @param name Span name suffix (e.g. "receive"). + @param hashData Pointer to at least 16 bytes of hash data. + @param hashSize Size of the hash buffer (must be >= 16). + */ + static SpanGuard + txSpan( + std::string_view prefix, + std::string_view name, + std::uint8_t const* hashData, + std::size_t hashSize); + + /** Create a span with hash-derived trace_id and a remote parent. + trace_id = hashData[0:16], parent span_id from protobuf context + propagation. Produces a child span of the sender's span while + sharing the deterministic trace_id. + @param prefix Span name prefix. + @param name Span name suffix. + @param hashData Pointer to at least 16 bytes of hash data. + @param hashSize Size of the hash buffer (must be >= 16). + @param parentSpanId Pointer to 8 bytes of parent span ID. + @param parentSpanSize Size of parent span ID buffer (must be 8). + @param traceFlags Trace flags from remote context. + */ + static SpanGuard + txSpan( + std::string_view prefix, + std::string_view name, + std::uint8_t const* hashData, + std::size_t hashSize, + std::uint8_t const* parentSpanId, + std::size_t parentSpanSize, + std::uint8_t traceFlags); + // --- Context capture ----------------------------------------------- /** Snapshot the current thread's OTel context for cross-thread use. @@ -350,6 +390,24 @@ public: return {}; } + [[nodiscard]] static SpanGuard + txSpan(std::string_view, std::string_view, std::uint8_t const*, std::size_t) + { + return {}; + } + [[nodiscard]] static SpanGuard + txSpan( + std::string_view, + std::string_view, + std::uint8_t const*, + std::size_t, + std::uint8_t const*, + std::size_t, + std::uint8_t) + { + return {}; + } + [[nodiscard]] SpanContext captureContext() const { diff --git a/src/libxrpl/telemetry/SpanGuard.cpp b/src/libxrpl/telemetry/SpanGuard.cpp index 4332f0f7b5..22f25ae05a 100644 --- a/src/libxrpl/telemetry/SpanGuard.cpp +++ b/src/libxrpl/telemetry/SpanGuard.cpp @@ -28,12 +28,17 @@ #include #include #include +#include #include #include #include +#include #include +#include +#include #include +#include #include #include @@ -226,6 +231,74 @@ SpanGuard::linkedSpan(std::string_view name, SpanContext const& linkCtx) opts))); } +// ===== Transaction span with hash-derived trace ID ======================== + +SpanGuard +SpanGuard::txSpan( + std::string_view prefix, + std::string_view name, + std::uint8_t const* hashData, + std::size_t hashSize) +{ + if (hashSize < 16) + return {}; + auto* tel = Telemetry::getInstance(); + if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions()) + return {}; + + otel_trace::TraceId traceId(opentelemetry::nostd::span(hashData, 16)); + + std::uint8_t spanIdBytes[8]; + std::random_device rd; + for (auto& b : spanIdBytes) + b = static_cast(rd()); + otel_trace::SpanId spanId(opentelemetry::nostd::span(spanIdBytes, 8)); + + otel_trace::SpanContext syntheticCtx( + traceId, spanId, otel_trace::TraceFlags(1), /* remote = */ false); + + auto parentCtx = opentelemetry::context::Context{}.SetValue( + otel_trace::kSpanKey, + opentelemetry::nostd::shared_ptr( + new otel_trace::DefaultSpan(syntheticCtx))); + + auto fullName = std::string(prefix) + "." + std::string(name); + return SpanGuard(std::make_unique(tel->startSpan(fullName, parentCtx))); +} + +SpanGuard +SpanGuard::txSpan( + std::string_view prefix, + std::string_view name, + std::uint8_t const* hashData, + std::size_t hashSize, + std::uint8_t const* parentSpanId, + std::size_t parentSpanSize, + std::uint8_t traceFlags) +{ + if (hashSize < 16 || parentSpanSize != 8) + return {}; + auto* tel = Telemetry::getInstance(); + if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions()) + return {}; + + otel_trace::TraceId traceId(opentelemetry::nostd::span(hashData, 16)); + + otel_trace::SpanId parentSpan( + opentelemetry::nostd::span(parentSpanId, 8)); + + otel_trace::SpanContext combinedCtx( + traceId, parentSpan, otel_trace::TraceFlags(traceFlags), /* remote = */ true); + + auto parentCtx = opentelemetry::context::Context{}.SetValue( + otel_trace::kSpanKey, + opentelemetry::nostd::shared_ptr( + new otel_trace::DefaultSpan(combinedCtx))); + + auto fullName = std::string(prefix) + "." + std::string(name); + return SpanGuard(std::make_unique(tel->startSpan(fullName, parentCtx))); +} + // ===== Context capture ===================================================== SpanContext diff --git a/src/xrpld/app/misc/NetworkOPs.cpp b/src/xrpld/app/misc/NetworkOPs.cpp index b02e4c4cf7..a7eb131514 100644 --- a/src/xrpld/app/misc/NetworkOPs.cpp +++ b/src/xrpld/app/misc/NetworkOPs.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -1314,8 +1315,7 @@ NetworkOPsImp::processTransaction( FailHard failType) { using namespace telemetry; - auto span = - SpanGuard::span(TraceCategory::Transactions, tx_span::prefix::tx, tx_span::op::process); + auto span = txProcessSpan(transaction->getID()); span.setAttribute(tx_span::attr::hash, to_string(transaction->getID()).c_str()); span.setAttribute(tx_span::attr::local, bLocal); diff --git a/src/xrpld/overlay/detail/PeerImp.cpp b/src/xrpld/overlay/detail/PeerImp.cpp index 4c4b6acc92..442f9fe194 100644 --- a/src/xrpld/overlay/detail/PeerImp.cpp +++ b/src/xrpld/overlay/detail/PeerImp.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -1423,21 +1424,12 @@ PeerImp::handleTransaction( bool eraseTxQueue, bool batch) { - using namespace telemetry; - auto span = - SpanGuard::span(TraceCategory::Transactions, tx_span::prefix::tx, tx_span::op::receive); - span.setAttribute(tx_span::attr::peerId, static_cast(id_)); - if (auto const version = getVersion(); !version.empty()) - span.setAttribute(tx_span::attr::peerVersion, version.c_str()); - XRPL_ASSERT(eraseTxQueue != batch, ("xrpl::PeerImp::handleTransaction : valid inputs")); if (tracking_.load() == Tracking::diverged) return; if (app_.getOPs().isNeedNetworkLedger()) { - // If we've never been in synch, there's nothing we can do - // with a transaction JLOG(p_journal_.debug()) << "Ignoring incoming transaction: Need network ledger"; return; } @@ -1448,7 +1440,13 @@ PeerImp::handleTransaction( { auto stx = std::make_shared(sit); uint256 const txID = stx->getTransactionID(); + + using namespace telemetry; + auto span = txReceiveSpan(txID, *m); span.setAttribute(tx_span::attr::hash, to_string(txID).c_str()); + span.setAttribute(tx_span::attr::peerId, static_cast(id_)); + if (auto const version = getVersion(); !version.empty()) + span.setAttribute(tx_span::attr::peerVersion, version.c_str()); // Charge strongly for attempting to relay a txn with tfInnerBatchTxn // LCOV_EXCL_START diff --git a/src/xrpld/telemetry/TxTracing.h b/src/xrpld/telemetry/TxTracing.h new file mode 100644 index 0000000000..e8f4d9f281 --- /dev/null +++ b/src/xrpld/telemetry/TxTracing.h @@ -0,0 +1,64 @@ +#pragma once + +/** Helper functions for creating transaction trace spans. + * + * Encapsulates the logic for creating SpanGuard instances with + * hash-derived trace IDs and optional protobuf parent extraction. + * Call sites in PeerImp and NetworkOPs stay simple one-liners. + * + * When XRPL_ENABLE_TELEMETRY is not defined, the functions return + * no-op SpanGuard instances (zero overhead, zero dependencies). + */ + +#include + +#include +#include + +#ifdef XRPL_ENABLE_TELEMETRY +#include +#endif + +namespace xrpl { +namespace telemetry { + +/** Create a "tx.receive" span for a transaction received from a peer. + * trace_id is derived from txID[0:16]. If the incoming message carries + * a protobuf TraceContext with a valid span_id, it is used as the + * parent to preserve relay ordering. + */ +inline SpanGuard +txReceiveSpan(uint256 const& txID, [[maybe_unused]] protocol::TMTransaction const& msg) +{ +#ifdef XRPL_ENABLE_TELEMETRY + if (msg.has_trace_context()) + { + auto const& tc = msg.trace_context(); + if (tc.has_span_id() && tc.span_id().size() == 8) + { + return SpanGuard::txSpan( + tx_span::prefix::tx, + tx_span::op::receive, + txID.data(), + txID.bytes, + reinterpret_cast(tc.span_id().data()), + tc.span_id().size(), + tc.has_trace_flags() ? static_cast(tc.trace_flags()) + : std::uint8_t{0}); + } + } +#endif + return SpanGuard::txSpan(tx_span::prefix::tx, tx_span::op::receive, txID.data(), txID.bytes); +} + +/** Create a "tx.process" span for transaction processing in NetworkOPs. + * trace_id is derived from txID[0:16]. + */ +inline SpanGuard +txProcessSpan(uint256 const& txID) +{ + return SpanGuard::txSpan(tx_span::prefix::tx, tx_span::op::process, txID.data(), txID.bytes); +} + +} // namespace telemetry +} // namespace xrpl