mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-03 00:36:48 +00:00
feat(telemetry): add hash-derived trace IDs for transaction spans
Derive trace_id from txHash[0:16] so all nodes handling the same transaction produce spans under the same trace. Protobuf span_id propagation provides parent-child relay ordering when available. - Add SpanGuard::txSpan() factory methods (hash-derived trace ID) - Add TxTracing.h helpers: txReceiveSpan(), txProcessSpan() - Update PeerImp and NetworkOPs to use the new helpers Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -29,12 +29,17 @@
|
||||
#include <opentelemetry/context/runtime_context.h>
|
||||
#include <opentelemetry/nostd/shared_ptr.h>
|
||||
#include <opentelemetry/trace/context.h>
|
||||
#include <opentelemetry/trace/default_span.h>
|
||||
#include <opentelemetry/trace/provider.h>
|
||||
#include <opentelemetry/trace/scope.h>
|
||||
#include <opentelemetry/trace/span.h>
|
||||
#include <opentelemetry/trace/span_context.h>
|
||||
#include <opentelemetry/trace/span_startoptions.h>
|
||||
#include <opentelemetry/trace/trace_flags.h>
|
||||
#include <opentelemetry/trace/trace_id.h>
|
||||
#include <opentelemetry/trace/tracer.h>
|
||||
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
@@ -227,6 +232,74 @@ SpanGuard::linkedSpan(std::string_view name, SpanContext const& linkCtx)
|
||||
opts)));
|
||||
}
|
||||
|
||||
// ===== Transaction span with hash-derived trace ID ========================
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::txSpan(
|
||||
std::string_view prefix,
|
||||
std::string_view name,
|
||||
std::uint8_t const* hashData,
|
||||
std::size_t hashSize)
|
||||
{
|
||||
if (hashSize < 16)
|
||||
return {};
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions())
|
||||
return {};
|
||||
|
||||
otel_trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(hashData, 16));
|
||||
|
||||
std::uint8_t spanIdBytes[8];
|
||||
std::random_device rd;
|
||||
for (auto& b : spanIdBytes)
|
||||
b = static_cast<std::uint8_t>(rd());
|
||||
otel_trace::SpanId spanId(opentelemetry::nostd::span<std::uint8_t const, 8>(spanIdBytes, 8));
|
||||
|
||||
otel_trace::SpanContext syntheticCtx(
|
||||
traceId, spanId, otel_trace::TraceFlags(1), /* remote = */ false);
|
||||
|
||||
auto parentCtx = opentelemetry::context::Context{}.SetValue(
|
||||
otel_trace::kSpanKey,
|
||||
opentelemetry::nostd::shared_ptr<otel_trace::Span>(
|
||||
new otel_trace::DefaultSpan(syntheticCtx)));
|
||||
|
||||
auto fullName = std::string(prefix) + "." + std::string(name);
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, parentCtx)));
|
||||
}
|
||||
|
||||
SpanGuard
|
||||
SpanGuard::txSpan(
|
||||
std::string_view prefix,
|
||||
std::string_view name,
|
||||
std::uint8_t const* hashData,
|
||||
std::size_t hashSize,
|
||||
std::uint8_t const* parentSpanId,
|
||||
std::size_t parentSpanSize,
|
||||
std::uint8_t traceFlags)
|
||||
{
|
||||
if (hashSize < 16 || parentSpanSize != 8)
|
||||
return {};
|
||||
auto* tel = Telemetry::getInstance();
|
||||
if (!tel || !tel->isEnabled() || !tel->shouldTraceTransactions())
|
||||
return {};
|
||||
|
||||
otel_trace::TraceId traceId(opentelemetry::nostd::span<std::uint8_t const, 16>(hashData, 16));
|
||||
|
||||
otel_trace::SpanId parentSpan(
|
||||
opentelemetry::nostd::span<std::uint8_t const, 8>(parentSpanId, 8));
|
||||
|
||||
otel_trace::SpanContext combinedCtx(
|
||||
traceId, parentSpan, otel_trace::TraceFlags(traceFlags), /* remote = */ true);
|
||||
|
||||
auto parentCtx = opentelemetry::context::Context{}.SetValue(
|
||||
otel_trace::kSpanKey,
|
||||
opentelemetry::nostd::shared_ptr<otel_trace::Span>(
|
||||
new otel_trace::DefaultSpan(combinedCtx)));
|
||||
|
||||
auto fullName = std::string(prefix) + "." + std::string(name);
|
||||
return SpanGuard(std::make_unique<Impl>(tel->startSpan(fullName, parentCtx)));
|
||||
}
|
||||
|
||||
// ===== Context capture =====================================================
|
||||
|
||||
SpanContext
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include <xrpld/rpc/MPTokenIssuanceID.h>
|
||||
#include <xrpld/rpc/ServerHandler.h>
|
||||
#include <xrpld/telemetry/TxSpanNames.h>
|
||||
#include <xrpld/telemetry/TxTracing.h>
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/basics/ToString.h>
|
||||
@@ -1314,8 +1315,7 @@ NetworkOPsImp::processTransaction(
|
||||
FailHard failType)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto span =
|
||||
SpanGuard::span(TraceCategory::Transactions, tx_span::prefix::tx, tx_span::op::process);
|
||||
auto span = txProcessSpan(transaction->getID());
|
||||
span.setAttribute(tx_span::attr::hash, to_string(transaction->getID()).c_str());
|
||||
span.setAttribute(tx_span::attr::local, bLocal);
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <xrpld/peerfinder/PeerfinderManager.h>
|
||||
#include <xrpld/peerfinder/Slot.h>
|
||||
#include <xrpld/telemetry/TxSpanNames.h>
|
||||
#include <xrpld/telemetry/TxTracing.h>
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/basics/SHAMapHash.h>
|
||||
@@ -1423,21 +1424,12 @@ PeerImp::handleTransaction(
|
||||
bool eraseTxQueue,
|
||||
bool batch)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto span =
|
||||
SpanGuard::span(TraceCategory::Transactions, tx_span::prefix::tx, tx_span::op::receive);
|
||||
span.setAttribute(tx_span::attr::peerId, static_cast<int64_t>(id_));
|
||||
if (auto const version = getVersion(); !version.empty())
|
||||
span.setAttribute(tx_span::attr::peerVersion, version.c_str());
|
||||
|
||||
XRPL_ASSERT(eraseTxQueue != batch, ("xrpl::PeerImp::handleTransaction : valid inputs"));
|
||||
if (tracking_.load() == Tracking::diverged)
|
||||
return;
|
||||
|
||||
if (app_.getOPs().isNeedNetworkLedger())
|
||||
{
|
||||
// If we've never been in synch, there's nothing we can do
|
||||
// with a transaction
|
||||
JLOG(p_journal_.debug()) << "Ignoring incoming transaction: Need network ledger";
|
||||
return;
|
||||
}
|
||||
@@ -1448,7 +1440,13 @@ PeerImp::handleTransaction(
|
||||
{
|
||||
auto stx = std::make_shared<STTx const>(sit);
|
||||
uint256 const txID = stx->getTransactionID();
|
||||
|
||||
using namespace telemetry;
|
||||
auto span = txReceiveSpan(txID, *m);
|
||||
span.setAttribute(tx_span::attr::hash, to_string(txID).c_str());
|
||||
span.setAttribute(tx_span::attr::peerId, static_cast<int64_t>(id_));
|
||||
if (auto const version = getVersion(); !version.empty())
|
||||
span.setAttribute(tx_span::attr::peerVersion, version.c_str());
|
||||
|
||||
// Charge strongly for attempting to relay a txn with tfInnerBatchTxn
|
||||
// LCOV_EXCL_START
|
||||
|
||||
64
src/xrpld/telemetry/TxTracing.h
Normal file
64
src/xrpld/telemetry/TxTracing.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
|
||||
/** Helper functions for creating transaction trace spans.
|
||||
*
|
||||
* Encapsulates the logic for creating SpanGuard instances with
|
||||
* hash-derived trace IDs and optional protobuf parent extraction.
|
||||
* Call sites in PeerImp and NetworkOPs stay simple one-liners.
|
||||
*
|
||||
* When XRPL_ENABLE_TELEMETRY is not defined, the functions return
|
||||
* no-op SpanGuard instances (zero overhead, zero dependencies).
|
||||
*/
|
||||
|
||||
#include <xrpld/telemetry/TxSpanNames.h>
|
||||
|
||||
#include <xrpl/basics/base_uint.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
#include <xrpl/proto/xrpl.pb.h>
|
||||
#endif
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
/** Create a "tx.receive" span for a transaction received from a peer.
|
||||
* trace_id is derived from txID[0:16]. If the incoming message carries
|
||||
* a protobuf TraceContext with a valid span_id, it is used as the
|
||||
* parent to preserve relay ordering.
|
||||
*/
|
||||
inline SpanGuard
|
||||
txReceiveSpan(uint256 const& txID, [[maybe_unused]] protocol::TMTransaction const& msg)
|
||||
{
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
if (msg.has_trace_context())
|
||||
{
|
||||
auto const& tc = msg.trace_context();
|
||||
if (tc.has_span_id() && tc.span_id().size() == 8)
|
||||
{
|
||||
return SpanGuard::txSpan(
|
||||
tx_span::prefix::tx,
|
||||
tx_span::op::receive,
|
||||
txID.data(),
|
||||
txID.bytes,
|
||||
reinterpret_cast<std::uint8_t const*>(tc.span_id().data()),
|
||||
tc.span_id().size(),
|
||||
tc.has_trace_flags() ? static_cast<std::uint8_t>(tc.trace_flags())
|
||||
: std::uint8_t{0});
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return SpanGuard::txSpan(tx_span::prefix::tx, tx_span::op::receive, txID.data(), txID.bytes);
|
||||
}
|
||||
|
||||
/** Create a "tx.process" span for transaction processing in NetworkOPs.
|
||||
* trace_id is derived from txID[0:16].
|
||||
*/
|
||||
inline SpanGuard
|
||||
txProcessSpan(uint256 const& txID)
|
||||
{
|
||||
return SpanGuard::txSpan(tx_span::prefix::tx, tx_span::op::process, txID.data(), txID.bytes);
|
||||
}
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
Reference in New Issue
Block a user