Merge branch 'pratik/otel-phase3-tx-tracing' into pratik/otel-phase4-consensus-tracing

This commit is contained in:
Pratik Mankawde
2026-06-08 16:08:27 +01:00
3 changed files with 101 additions and 3 deletions

View File

@@ -17,6 +17,7 @@
#ifdef XRPL_ENABLE_TELEMETRY
#include <xrpl/proto/xrpl.pb.h>
#include <xrpl/telemetry/TraceContextValidation.h>
#include <opentelemetry/context/context.h>
#include <opentelemetry/trace/context.h>
@@ -40,8 +41,9 @@ extractFromProtobuf(protocol::TraceContext const& proto)
{
namespace trace = opentelemetry::trace;
if (!proto.has_trace_id() || proto.trace_id().size() != 16 || !proto.has_span_id() ||
proto.span_id().size() != 8)
// Reject malformed or all-zero ids from the peer before trusting
// them as a parent. See TraceContextValidation.h.
if (!isValidTraceContext(proto))
{
return opentelemetry::context::Context{};
}

View File

@@ -0,0 +1,93 @@
#pragma once
/** Validation predicates for peer-supplied trace context.
*
* A protobuf TraceContext arrives inside untrusted peer messages
* (TMTransaction, TMProposeSet, TMValidation). Before a receiving node
* builds a span from it, the context must be checked: a peer can send a
* malformed or all-zero trace_id/span_id, either by accident or to
* pollute traces. This header is the single place those checks live, so
* every receive site agrees on what "valid" means.
*
* Validity follows the OpenTelemetry spec: a trace_id is 16 bytes, a
* span_id is 8 bytes, and an all-zero id is invalid and must not be
* used as a parent.
*
* Dependency diagram:
*
* protocol::TraceContext (untrusted, from peer)
* |
* v
* isValidTraceContext() / isValidSpanId() / isValidTraceId()
* |
* +--- valid ----> build child span from peer context
* |
* +--- invalid --> ignore peer context, start fresh span
*
* This header depends only on the generated protobuf type. It pulls in
* no OpenTelemetry headers, so receive sites that use SpanGuard keep
* SpanGuard's encapsulation of OTel types.
*
* @note Thread-safe: all functions are pure and operate only on their
* arguments. No shared state.
*
* Usage:
* @code
* // Full parent context (both ids come from the peer):
* if (isValidTraceContext(msg.trace_context()))
* buildChildSpan(...);
*
* // Span-only (trace_id is derived locally, not from the peer):
* if (tc.has_span_id() && isValidSpanId(tc.span_id()))
* buildChildSpan(...);
* @endcode
*/
#include <xrpl/proto/xrpl.pb.h>
#include <algorithm>
#include <ranges>
#include <string>
namespace xrpl::telemetry {
/** True if the bytes are a valid OTel trace_id: 16 bytes, not all-zero.
*
* @param traceId The raw trace_id bytes from a protobuf TraceContext.
* @return true if usable as a trace identifier, false otherwise.
*/
inline bool
isValidTraceId(std::string const& traceId)
{
return traceId.size() == 16 && std::ranges::any_of(traceId, [](char c) { return c != 0; });
}
/** True if the bytes are a valid OTel span_id: 8 bytes, not all-zero.
*
* @param spanId The raw span_id bytes from a protobuf TraceContext.
* @return true if usable as a span identifier, false otherwise.
*/
inline bool
isValidSpanId(std::string const& spanId)
{
return spanId.size() == 8 && std::ranges::any_of(spanId, [](char c) { return c != 0; });
}
/** True if the context carries a usable parent: a valid trace_id and a
* valid span_id together.
*
* Use this where both ids are taken from the peer (consensus receive,
* generic extraction). The transaction path derives its trace_id
* locally from the txID, so it checks isValidSpanId() alone instead.
*
* @param tc The protobuf TraceContext received from a peer.
* @return true if both ids are present and valid, false otherwise.
*/
inline bool
isValidTraceContext(protocol::TraceContext const& tc)
{
return tc.has_trace_id() && isValidTraceId(tc.trace_id()) && tc.has_span_id() &&
isValidSpanId(tc.span_id());
}
} // namespace xrpl::telemetry

View File

@@ -15,6 +15,7 @@
#include <xrpl/basics/base_uint.h>
#include <xrpl/proto/xrpl.pb.h>
#include <xrpl/telemetry/SpanGuard.h>
#include <xrpl/telemetry/TraceContextValidation.h>
namespace xrpl::telemetry {
@@ -30,7 +31,9 @@ txReceiveSpan(uint256 const& txID, [[maybe_unused]] protocol::TMTransaction cons
if (msg.has_trace_context())
{
auto const& tc = msg.trace_context();
if (tc.has_span_id() && tc.span_id().size() == 8)
// Only the span_id is taken from the peer here; the trace_id is
// derived locally from txID, so validate the span_id alone.
if (tc.has_span_id() && isValidSpanId(tc.span_id()))
{
return SpanGuard::hashSpan(
TraceCategory::Transactions,