feat(telemetry): add hash-derived trace IDs for transaction spans

Derive trace_id from txHash[0:16] so all nodes handling the same
transaction produce spans under the same trace. Protobuf span_id
propagation provides parent-child relay ordering when available.

- Add SpanGuard::txSpan() factory methods (hash-derived trace ID)
- Add TxTracing.h helpers: txReceiveSpan(), txProcessSpan()
- Update PeerImp and NetworkOPs to use the new helpers

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-04-21 17:31:16 +01:00
parent 397c66cede
commit ded848075d
5 changed files with 204 additions and 11 deletions

View File

@@ -237,6 +237,46 @@ public:
[[nodiscard]] static SpanGuard
linkedSpan(std::string_view name, SpanContext const& linkCtx);
// --- Transaction span with hash-derived trace ID -------------------
/** Create a span whose trace_id is derived from a transaction hash.
trace_id = hashData[0:16], span_id = random. All nodes handling
the same transaction independently produce spans under the same
trace, enabling cross-node correlation without context propagation.
@param prefix Span name prefix (e.g. "tx").
@param name Span name suffix (e.g. "receive").
@param hashData Pointer to at least 16 bytes of hash data.
@param hashSize Size of the hash buffer (must be >= 16).
*/
static SpanGuard
txSpan(
std::string_view prefix,
std::string_view name,
std::uint8_t const* hashData,
std::size_t hashSize);
/** Create a span with hash-derived trace_id and a remote parent.
trace_id = hashData[0:16], parent span_id from protobuf context
propagation. Produces a child span of the sender's span while
sharing the deterministic trace_id.
@param prefix Span name prefix.
@param name Span name suffix.
@param hashData Pointer to at least 16 bytes of hash data.
@param hashSize Size of the hash buffer (must be >= 16).
@param parentSpanId Pointer to 8 bytes of parent span ID.
@param parentSpanSize Size of parent span ID buffer (must be 8).
@param traceFlags Trace flags from remote context.
*/
static SpanGuard
txSpan(
std::string_view prefix,
std::string_view name,
std::uint8_t const* hashData,
std::size_t hashSize,
std::uint8_t const* parentSpanId,
std::size_t parentSpanSize,
std::uint8_t traceFlags);
// --- Context capture -----------------------------------------------
/** Snapshot the current thread's OTel context for cross-thread use.
@@ -350,6 +390,24 @@ public:
return {};
}
[[nodiscard]] static SpanGuard
txSpan(std::string_view, std::string_view, std::uint8_t const*, std::size_t)
{
return {};
}
[[nodiscard]] static SpanGuard
txSpan(
std::string_view,
std::string_view,
std::uint8_t const*,
std::size_t,
std::uint8_t const*,
std::size_t,
std::uint8_t)
{
return {};
}
[[nodiscard]] SpanContext
captureContext() const
{

View File

@@ -28,12 +28,17 @@
#include <opentelemetry/context/runtime_context.h>
#include <opentelemetry/nostd/shared_ptr.h>
#include <opentelemetry/trace/context.h>
#include <opentelemetry/trace/default_span.h>
#include <opentelemetry/trace/provider.h>
#include <opentelemetry/trace/scope.h>
#include <opentelemetry/trace/span.h>
#include <opentelemetry/trace/span_context.h>
#include <opentelemetry/trace/span_startoptions.h>
#include <opentelemetry/trace/trace_flags.h>
#include <opentelemetry/trace/trace_id.h>
#include <opentelemetry/trace/tracer.h>
#include <random>
#include <string>
#include <utility>
@@ -226,6 +231,74 @@ SpanGuard::linkedSpan(std::string_view name, SpanContext const& linkCtx)
opts)));
}
// ===== Transaction span with hash-derived trace ID ========================
SpanGuard
SpanGuard::txSpan(
std::string_view prefix,
std::string_view name,
std::uint8_t const* hashData,
std::size_t hashSize)
{
if (hashSize < 16)
return {};
auto* tel = Telemetry::getInstance();
if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions())
return {};
otel_trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(hashData, 16));
std::uint8_t spanIdBytes[8];
std::random_device rd;
for (auto& b : spanIdBytes)
b = static_cast<std::uint8_t>(rd());
otel_trace::SpanId spanId(opentelemetry::nostd::span<std::uint8_t const, 8>(spanIdBytes, 8));
otel_trace::SpanContext syntheticCtx(
traceId, spanId, otel_trace::TraceFlags(1), /* remote = */ false);
auto parentCtx = opentelemetry::context::Context{}.SetValue(
otel_trace::kSpanKey,
opentelemetry::nostd::shared_ptr<otel_trace::Span>(
new otel_trace::DefaultSpan(syntheticCtx)));
auto fullName = std::string(prefix) + "." + std::string(name);
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, parentCtx)));
}
SpanGuard
SpanGuard::txSpan(
std::string_view prefix,
std::string_view name,
std::uint8_t const* hashData,
std::size_t hashSize,
std::uint8_t const* parentSpanId,
std::size_t parentSpanSize,
std::uint8_t traceFlags)
{
if (hashSize < 16 || parentSpanSize != 8)
return {};
auto* tel = Telemetry::getInstance();
if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions())
return {};
otel_trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(hashData, 16));
otel_trace::SpanId parentSpan(
opentelemetry::nostd::span<std::uint8_t const, 8>(parentSpanId, 8));
otel_trace::SpanContext combinedCtx(
traceId, parentSpan, otel_trace::TraceFlags(traceFlags), /* remote = */ true);
auto parentCtx = opentelemetry::context::Context{}.SetValue(
otel_trace::kSpanKey,
opentelemetry::nostd::shared_ptr<otel_trace::Span>(
new otel_trace::DefaultSpan(combinedCtx)));
auto fullName = std::string(prefix) + "." + std::string(name);
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, parentCtx)));
}
// ===== Context capture =====================================================
SpanContext

View File

@@ -35,6 +35,7 @@
#include <xrpld/rpc/MPTokenIssuanceID.h>
#include <xrpld/rpc/ServerHandler.h>
#include <xrpld/telemetry/TxSpanNames.h>
#include <xrpld/telemetry/TxTracing.h>
#include <xrpl/basics/Log.h>
#include <xrpl/basics/ToString.h>
@@ -1314,8 +1315,7 @@ NetworkOPsImp::processTransaction(
FailHard failType)
{
using namespace telemetry;
auto span =
SpanGuard::span(TraceCategory::Transactions, tx_span::prefix::tx, tx_span::op::process);
auto span = txProcessSpan(transaction->getID());
span.setAttribute(tx_span::attr::hash, to_string(transaction->getID()).c_str());
span.setAttribute(tx_span::attr::local, bLocal);

View File

@@ -22,6 +22,7 @@
#include <xrpld/peerfinder/PeerfinderManager.h>
#include <xrpld/peerfinder/Slot.h>
#include <xrpld/telemetry/TxSpanNames.h>
#include <xrpld/telemetry/TxTracing.h>
#include <xrpl/basics/Log.h>
#include <xrpl/basics/SHAMapHash.h>
@@ -1423,21 +1424,12 @@ PeerImp::handleTransaction(
bool eraseTxQueue,
bool batch)
{
using namespace telemetry;
auto span =
SpanGuard::span(TraceCategory::Transactions, tx_span::prefix::tx, tx_span::op::receive);
span.setAttribute(tx_span::attr::peerId, static_cast<int64_t>(id_));
if (auto const version = getVersion(); !version.empty())
span.setAttribute(tx_span::attr::peerVersion, version.c_str());
XRPL_ASSERT(eraseTxQueue != batch, ("xrpl::PeerImp::handleTransaction : valid inputs"));
if (tracking_.load() == Tracking::diverged)
return;
if (app_.getOPs().isNeedNetworkLedger())
{
// If we've never been in synch, there's nothing we can do
// with a transaction
JLOG(p_journal_.debug()) << "Ignoring incoming transaction: Need network ledger";
return;
}
@@ -1448,7 +1440,13 @@ PeerImp::handleTransaction(
{
auto stx = std::make_shared<STTx const>(sit);
uint256 const txID = stx->getTransactionID();
using namespace telemetry;
auto span = txReceiveSpan(txID, *m);
span.setAttribute(tx_span::attr::hash, to_string(txID).c_str());
span.setAttribute(tx_span::attr::peerId, static_cast<int64_t>(id_));
if (auto const version = getVersion(); !version.empty())
span.setAttribute(tx_span::attr::peerVersion, version.c_str());
// Charge strongly for attempting to relay a txn with tfInnerBatchTxn
// LCOV_EXCL_START

View File

@@ -0,0 +1,64 @@
#pragma once
/** Helper functions for creating transaction trace spans.
*
* Encapsulates the logic for creating SpanGuard instances with
* hash-derived trace IDs and optional protobuf parent extraction.
* Call sites in PeerImp and NetworkOPs stay simple one-liners.
*
* When XRPL_ENABLE_TELEMETRY is not defined, the functions return
* no-op SpanGuard instances (zero overhead, zero dependencies).
*/
#include <xrpld/telemetry/TxSpanNames.h>
#include <xrpl/basics/base_uint.h>
#include <xrpl/telemetry/SpanGuard.h>
#ifdef XRPL_ENABLE_TELEMETRY
#include <xrpl/proto/xrpl.pb.h>
#endif
namespace xrpl {
namespace telemetry {
/** Create a "tx.receive" span for a transaction received from a peer.
* trace_id is derived from txID[0:16]. If the incoming message carries
* a protobuf TraceContext with a valid span_id, it is used as the
* parent to preserve relay ordering.
*/
inline SpanGuard
txReceiveSpan(uint256 const& txID, [[maybe_unused]] protocol::TMTransaction const& msg)
{
#ifdef XRPL_ENABLE_TELEMETRY
if (msg.has_trace_context())
{
auto const& tc = msg.trace_context();
if (tc.has_span_id() && tc.span_id().size() == 8)
{
return SpanGuard::txSpan(
tx_span::prefix::tx,
tx_span::op::receive,
txID.data(),
txID.bytes,
reinterpret_cast<std::uint8_t const*>(tc.span_id().data()),
tc.span_id().size(),
tc.has_trace_flags() ? static_cast<std::uint8_t>(tc.trace_flags())
: std::uint8_t{0});
}
}
#endif
return SpanGuard::txSpan(tx_span::prefix::tx, tx_span::op::receive, txID.data(), txID.bytes);
}
/** Create a "tx.process" span for transaction processing in NetworkOPs.
* trace_id is derived from txID[0:16].
*/
inline SpanGuard
txProcessSpan(uint256 const& txID)
{
return SpanGuard::txSpan(tx_span::prefix::tx, tx_span::op::process, txID.data(), txID.bytes);
}
} // namespace telemetry
} // namespace xrpl