mirror of
https://github.com/XRPLF/rippled.git
synced 2026-04-29 15:37:57 +00:00
feat(telemetry): add hash-derived trace IDs for transaction spans
Derive trace_id from txHash[0:16] so all nodes handling the same transaction produce spans under the same trace. Protobuf span_id propagation provides parent-child relay ordering when available. - Add SpanGuard::txSpan() factory methods (hash-derived trace ID) - Add TxTracing.h helpers: txReceiveSpan(), txProcessSpan() - Update PeerImp and NetworkOPs to use the new helpers Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -237,6 +237,46 @@ public:
|
||||
[[nodiscard]] static SpanGuard
|
||||
linkedSpan(std::string_view name, SpanContext const& linkCtx);
|
||||
|
||||
// --- Transaction span with hash-derived trace ID -------------------
|
||||
|
||||
/** Create a span whose trace_id is derived from a transaction hash.
|
||||
trace_id = hashData[0:16], span_id = random. All nodes handling
|
||||
the same transaction independently produce spans under the same
|
||||
trace, enabling cross-node correlation without context propagation.
|
||||
@param prefix Span name prefix (e.g. "tx").
|
||||
@param name Span name suffix (e.g. "receive").
|
||||
@param hashData Pointer to at least 16 bytes of hash data.
|
||||
@param hashSize Size of the hash buffer (must be >= 16).
|
||||
*/
|
||||
static SpanGuard
|
||||
txSpan(
|
||||
std::string_view prefix,
|
||||
std::string_view name,
|
||||
std::uint8_t const* hashData,
|
||||
std::size_t hashSize);
|
||||
|
||||
/** Create a span with hash-derived trace_id and a remote parent.
|
||||
trace_id = hashData[0:16], parent span_id from protobuf context
|
||||
propagation. Produces a child span of the sender's span while
|
||||
sharing the deterministic trace_id.
|
||||
@param prefix Span name prefix.
|
||||
@param name Span name suffix.
|
||||
@param hashData Pointer to at least 16 bytes of hash data.
|
||||
@param hashSize Size of the hash buffer (must be >= 16).
|
||||
@param parentSpanId Pointer to 8 bytes of parent span ID.
|
||||
@param parentSpanSize Size of parent span ID buffer (must be 8).
|
||||
@param traceFlags Trace flags from remote context.
|
||||
*/
|
||||
static SpanGuard
|
||||
txSpan(
|
||||
std::string_view prefix,
|
||||
std::string_view name,
|
||||
std::uint8_t const* hashData,
|
||||
std::size_t hashSize,
|
||||
std::uint8_t const* parentSpanId,
|
||||
std::size_t parentSpanSize,
|
||||
std::uint8_t traceFlags);
|
||||
|
||||
// --- Context capture -----------------------------------------------
|
||||
|
||||
/** Snapshot the current thread's OTel context for cross-thread use.
|
||||
@@ -350,6 +390,24 @@ public:
|
||||
return {};
|
||||
}
|
||||
|
||||
[[nodiscard]] static SpanGuard
|
||||
txSpan(std::string_view, std::string_view, std::uint8_t const*, std::size_t)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
[[nodiscard]] static SpanGuard
|
||||
txSpan(
|
||||
std::string_view,
|
||||
std::string_view,
|
||||
std::uint8_t const*,
|
||||
std::size_t,
|
||||
std::uint8_t const*,
|
||||
std::size_t,
|
||||
std::uint8_t)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
[[nodiscard]] SpanContext
|
||||
captureContext() const
|
||||
{
|
||||
|
||||
@@ -28,12 +28,17 @@
|
||||
#include <opentelemetry/context/runtime_context.h>
|
||||
#include <opentelemetry/nostd/shared_ptr.h>
|
||||
#include <opentelemetry/trace/context.h>
|
||||
#include <opentelemetry/trace/default_span.h>
|
||||
#include <opentelemetry/trace/provider.h>
|
||||
#include <opentelemetry/trace/scope.h>
|
||||
#include <opentelemetry/trace/span.h>
|
||||
#include <opentelemetry/trace/span_context.h>
|
||||
#include <opentelemetry/trace/span_startoptions.h>
|
||||
#include <opentelemetry/trace/trace_flags.h>
|
||||
#include <opentelemetry/trace/trace_id.h>
|
||||
#include <opentelemetry/trace/tracer.h>
|
||||
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
@@ -226,6 +231,74 @@ SpanGuard::linkedSpan(std::string_view name, SpanContext const& linkCtx)
|
||||
opts)));
|
||||
}
|
||||
|
||||
// ===== Transaction span with hash-derived trace ID ========================
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::txSpan(
|
||||
std::string_view prefix,
|
||||
std::string_view name,
|
||||
std::uint8_t const* hashData,
|
||||
std::size_t hashSize)
|
||||
{
|
||||
if (hashSize < 16)
|
||||
return {};
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions())
|
||||
return {};
|
||||
|
||||
otel_trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(hashData, 16));
|
||||
|
||||
std::uint8_t spanIdBytes[8];
|
||||
std::random_device rd;
|
||||
for (auto& b : spanIdBytes)
|
||||
b = static_cast<std::uint8_t>(rd());
|
||||
otel_trace::SpanId spanId(opentelemetry::nostd::span<std::uint8_t const, 8>(spanIdBytes, 8));
|
||||
|
||||
otel_trace::SpanContext syntheticCtx(
|
||||
traceId, spanId, otel_trace::TraceFlags(1), /* remote = */ false);
|
||||
|
||||
auto parentCtx = opentelemetry::context::Context{}.SetValue(
|
||||
otel_trace::kSpanKey,
|
||||
opentelemetry::nostd::shared_ptr<otel_trace::Span>(
|
||||
new otel_trace::DefaultSpan(syntheticCtx)));
|
||||
|
||||
auto fullName = std::string(prefix) + "." + std::string(name);
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, parentCtx)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::txSpan(
|
||||
std::string_view prefix,
|
||||
std::string_view name,
|
||||
std::uint8_t const* hashData,
|
||||
std::size_t hashSize,
|
||||
std::uint8_t const* parentSpanId,
|
||||
std::size_t parentSpanSize,
|
||||
std::uint8_t traceFlags)
|
||||
{
|
||||
if (hashSize < 16 || parentSpanSize != 8)
|
||||
return {};
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions())
|
||||
return {};
|
||||
|
||||
otel_trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(hashData, 16));
|
||||
|
||||
otel_trace::SpanId parentSpan(
|
||||
opentelemetry::nostd::span<std::uint8_t const, 8>(parentSpanId, 8));
|
||||
|
||||
otel_trace::SpanContext combinedCtx(
|
||||
traceId, parentSpan, otel_trace::TraceFlags(traceFlags), /* remote = */ true);
|
||||
|
||||
auto parentCtx = opentelemetry::context::Context{}.SetValue(
|
||||
otel_trace::kSpanKey,
|
||||
opentelemetry::nostd::shared_ptr<otel_trace::Span>(
|
||||
new otel_trace::DefaultSpan(combinedCtx)));
|
||||
|
||||
auto fullName = std::string(prefix) + "." + std::string(name);
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, parentCtx)));
|
||||
}
|
||||
|
||||
// ===== Context capture =====================================================
|
||||
|
||||
SpanContext
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include <xrpld/rpc/MPTokenIssuanceID.h>
|
||||
#include <xrpld/rpc/ServerHandler.h>
|
||||
#include <xrpld/telemetry/TxSpanNames.h>
|
||||
#include <xrpld/telemetry/TxTracing.h>
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/basics/ToString.h>
|
||||
@@ -1314,8 +1315,7 @@ NetworkOPsImp::processTransaction(
|
||||
FailHard failType)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto span =
|
||||
SpanGuard::span(TraceCategory::Transactions, tx_span::prefix::tx, tx_span::op::process);
|
||||
auto span = txProcessSpan(transaction->getID());
|
||||
span.setAttribute(tx_span::attr::hash, to_string(transaction->getID()).c_str());
|
||||
span.setAttribute(tx_span::attr::local, bLocal);
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <xrpld/peerfinder/PeerfinderManager.h>
|
||||
#include <xrpld/peerfinder/Slot.h>
|
||||
#include <xrpld/telemetry/TxSpanNames.h>
|
||||
#include <xrpld/telemetry/TxTracing.h>
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/basics/SHAMapHash.h>
|
||||
@@ -1423,21 +1424,12 @@ PeerImp::handleTransaction(
|
||||
bool eraseTxQueue,
|
||||
bool batch)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto span =
|
||||
SpanGuard::span(TraceCategory::Transactions, tx_span::prefix::tx, tx_span::op::receive);
|
||||
span.setAttribute(tx_span::attr::peerId, static_cast<int64_t>(id_));
|
||||
if (auto const version = getVersion(); !version.empty())
|
||||
span.setAttribute(tx_span::attr::peerVersion, version.c_str());
|
||||
|
||||
XRPL_ASSERT(eraseTxQueue != batch, ("xrpl::PeerImp::handleTransaction : valid inputs"));
|
||||
if (tracking_.load() == Tracking::diverged)
|
||||
return;
|
||||
|
||||
if (app_.getOPs().isNeedNetworkLedger())
|
||||
{
|
||||
// If we've never been in synch, there's nothing we can do
|
||||
// with a transaction
|
||||
JLOG(p_journal_.debug()) << "Ignoring incoming transaction: Need network ledger";
|
||||
return;
|
||||
}
|
||||
@@ -1448,7 +1440,13 @@ PeerImp::handleTransaction(
|
||||
{
|
||||
auto stx = std::make_shared<STTx const>(sit);
|
||||
uint256 const txID = stx->getTransactionID();
|
||||
|
||||
using namespace telemetry;
|
||||
auto span = txReceiveSpan(txID, *m);
|
||||
span.setAttribute(tx_span::attr::hash, to_string(txID).c_str());
|
||||
span.setAttribute(tx_span::attr::peerId, static_cast<int64_t>(id_));
|
||||
if (auto const version = getVersion(); !version.empty())
|
||||
span.setAttribute(tx_span::attr::peerVersion, version.c_str());
|
||||
|
||||
// Charge strongly for attempting to relay a txn with tfInnerBatchTxn
|
||||
// LCOV_EXCL_START
|
||||
|
||||
64
src/xrpld/telemetry/TxTracing.h
Normal file
64
src/xrpld/telemetry/TxTracing.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
|
||||
/** Helper functions for creating transaction trace spans.
|
||||
*
|
||||
* Encapsulates the logic for creating SpanGuard instances with
|
||||
* hash-derived trace IDs and optional protobuf parent extraction.
|
||||
* Call sites in PeerImp and NetworkOPs stay simple one-liners.
|
||||
*
|
||||
* When XRPL_ENABLE_TELEMETRY is not defined, the functions return
|
||||
* no-op SpanGuard instances (zero overhead, zero dependencies).
|
||||
*/
|
||||
|
||||
#include <xrpld/telemetry/TxSpanNames.h>
|
||||
|
||||
#include <xrpl/basics/base_uint.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
#include <xrpl/proto/xrpl.pb.h>
|
||||
#endif
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
/** Create a "tx.receive" span for a transaction received from a peer.
|
||||
* trace_id is derived from txID[0:16]. If the incoming message carries
|
||||
* a protobuf TraceContext with a valid span_id, it is used as the
|
||||
* parent to preserve relay ordering.
|
||||
*/
|
||||
inline SpanGuard
|
||||
txReceiveSpan(uint256 const& txID, [[maybe_unused]] protocol::TMTransaction const& msg)
|
||||
{
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
if (msg.has_trace_context())
|
||||
{
|
||||
auto const& tc = msg.trace_context();
|
||||
if (tc.has_span_id() && tc.span_id().size() == 8)
|
||||
{
|
||||
return SpanGuard::txSpan(
|
||||
tx_span::prefix::tx,
|
||||
tx_span::op::receive,
|
||||
txID.data(),
|
||||
txID.bytes,
|
||||
reinterpret_cast<std::uint8_t const*>(tc.span_id().data()),
|
||||
tc.span_id().size(),
|
||||
tc.has_trace_flags() ? static_cast<std::uint8_t>(tc.trace_flags())
|
||||
: std::uint8_t{0});
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return SpanGuard::txSpan(tx_span::prefix::tx, tx_span::op::receive, txID.data(), txID.bytes);
|
||||
}
|
||||
|
||||
/** Create a "tx.process" span for transaction processing in NetworkOPs.
|
||||
* trace_id is derived from txID[0:16].
|
||||
*/
|
||||
inline SpanGuard
|
||||
txProcessSpan(uint256 const& txID)
|
||||
{
|
||||
return SpanGuard::txSpan(tx_span::prefix::tx, tx_span::op::process, txID.data(), txID.bytes);
|
||||
}
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
Reference in New Issue
Block a user