From a104140a517c6c43617a4ab3bc30a0e6d3e24f92 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 27 May 2026 16:46:35 +0100 Subject: [PATCH 1/4] addressing code review comments Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- docs/build/telemetry.md | 3 +- include/xrpl/protocol/detail/features.macro | 1 - include/xrpl/telemetry/Telemetry.h | 33 +++++++++++---------- src/libxrpl/telemetry/SpanGuard.cpp | 3 +- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/docs/build/telemetry.md b/docs/build/telemetry.md index e8d7fb2dd5..eff1a6848b 100644 --- a/docs/build/telemetry.md +++ b/docs/build/telemetry.md @@ -36,7 +36,8 @@ such as Grafana Tempo. Telemetry is **off by default** at both compile time and runtime: - **Compile time**: The Conan option `telemetry` and CMake option `telemetry` must be set to `True`/`ON`. - When disabled, all tracing macros compile to `((void)0)` with zero overhead. + When disabled, all `SpanGuard` calls compile to inline no-ops (defined in `SpanGuard.h`) + with zero overhead — no OTel SDK dependency required. - **Runtime**: The `[telemetry]` config section must set `enabled=1`. When disabled at runtime, a no-op implementation is used. diff --git a/include/xrpl/protocol/detail/features.macro b/include/xrpl/protocol/detail/features.macro index bad43dd6ed..494b3fa6cd 100644 --- a/include/xrpl/protocol/detail/features.macro +++ b/include/xrpl/protocol/detail/features.macro @@ -15,7 +15,6 @@ // Add new amendments to the top of this list. // Keep it sorted in reverse chronological order. -XRPL_FIX (Cleanup3_2_0, Supported::no, VoteBehavior::DefaultNo) XRPL_FEATURE(MPTokensV2, Supported::no, VoteBehavior::DefaultNo) XRPL_FIX (Security3_1_3, Supported::no, VoteBehavior::DefaultNo) XRPL_FIX (PermissionedDomainInvariant, Supported::yes, VoteBehavior::DefaultNo) diff --git a/include/xrpl/telemetry/Telemetry.h b/include/xrpl/telemetry/Telemetry.h index 1d69e01a43..ae9303a1db 100644 --- a/include/xrpl/telemetry/Telemetry.h +++ b/include/xrpl/telemetry/Telemetry.h @@ -35,32 +35,33 @@ Usage examples: - 1. Check before tracing (typical guard pattern): + 1. Root span at a subsystem entry point (typical usage): @code - auto& telemetry = registry.getTelemetry(); - if (telemetry.isEnabled() && telemetry.shouldTraceRpc()) - { - auto span = telemetry.startSpan("rpc.command.server_info"); - // ... do work, span ends when shared_ptr refcount drops to 0 - } + // In an RPC handler dispatch: + auto guard = SpanGuard::span(TraceCategory::Rpc, "rpc", commandName); + guard.setAttribute("xrpl.rpc.command", commandName); + // ... process request + // guard destructor automatically ends the span on scope exit @endcode - 2. RAII tracing with SpanGuard (preferred): + 2. Child span for a sub-operation (cross-scope): @code - if (telemetry.isEnabled() && telemetry.shouldTraceRpc()) + auto parent = SpanGuard::span(TraceCategory::Transactions, "tx", "process"); { - SpanGuard guard(telemetry.startSpan("rpc.command.submit")); - guard.setAttribute("xrpl.rpc.command", "submit"); - // ... guard ends span automatically on scope exit + auto child = parent.childSpan("tx.apply"); + child.setAttribute("xrpl.tx.type", txType); + // child ends here } + // parent continues, then ends here @endcode 3. Cross-thread context propagation: @code - // On thread A: capture context - auto ctx = guard.context(); - // On thread B: create child span with explicit parent - auto child = telemetry.startSpan("async.work", ctx); + // Thread A: capture the active context while span is in scope + auto ctx = parentGuard.captureContext(); + + // Thread B: create child span with explicit parent + auto child = SpanGuard::childSpan("async.work", ctx); @endcode @note Thread safety: The Telemetry interface is safe for concurrent reads diff --git a/src/libxrpl/telemetry/SpanGuard.cpp b/src/libxrpl/telemetry/SpanGuard.cpp index 11eda56ed6..90bb139b97 100644 --- a/src/libxrpl/telemetry/SpanGuard.cpp +++ b/src/libxrpl/telemetry/SpanGuard.cpp @@ -35,6 +35,7 @@ #include #include +#include #include namespace xrpl { @@ -327,7 +328,7 @@ SpanGuard::recordException(std::exception const& e) return; impl_->span->AddEvent( "exception", - {{"exception.type", "std::exception"}, {"exception.message", std::string(e.what())}}); + {{"exception.type", typeid(e).name()}, {"exception.message", std::string(e.what())}}); impl_->span->SetStatus(otel_trace::StatusCode::kError, e.what()); } From 6aa8570d6ce52cb6d15cbe346fc6a7e1a0d585e1 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 27 May 2026 17:36:06 +0100 Subject: [PATCH 2/4] addressed code review comments. Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- include/xrpl/telemetry/SpanNames.h | 1 + include/xrpl/telemetry/Telemetry.h | 35 ++++++++++++++------------ src/xrpld/app/main/GRPCServer.cpp | 8 ++++++ src/xrpld/app/main/GrpcSpanNames.h | 2 ++ src/xrpld/rpc/detail/RPCHandler.cpp | 7 +++++- src/xrpld/rpc/detail/ServerHandler.cpp | 3 +++ 6 files changed, 39 insertions(+), 17 deletions(-) diff --git a/include/xrpl/telemetry/SpanNames.h b/include/xrpl/telemetry/SpanNames.h index 3c8b6144f9..9fdc37cd64 100644 --- a/include/xrpl/telemetry/SpanNames.h +++ b/include/xrpl/telemetry/SpanNames.h @@ -20,6 +20,7 @@ * - Per-span attribute keys: bare field name (span name carries the domain). * - Collision qualifier: _ when bare name collides across * domains or with OTel reserved `status` (e.g. rpc_status, grpc_status). + * - Shared cross-span attributes: _ (underscore) form. * - Resource attribute keys: xrpl.. (process-identity). * - Span prefixes: [.]. */ diff --git a/include/xrpl/telemetry/Telemetry.h b/include/xrpl/telemetry/Telemetry.h index f648289569..0cb217bd29 100644 --- a/include/xrpl/telemetry/Telemetry.h +++ b/include/xrpl/telemetry/Telemetry.h @@ -3,13 +3,15 @@ /** Abstract interface for OpenTelemetry distributed tracing. Provides the Telemetry base class that all components use to create trace - spans. Two concrete implementations exist, selected at construction time + spans. Three concrete implementations exist, selected at construction time by make_Telemetry(): - TelemetryImpl (Telemetry.cpp): real OTel SDK integration, compiled only when XRPL_ENABLE_TELEMETRY is defined and enabled at runtime. - NullTelemetry (NullTelemetry.cpp): no-op stub used when telemetry is disabled at compile time or runtime. + - NullTelemetryOtel (Telemetry.cpp): no-op stub that still depends on + the OTel API (used during transition or for testing). Inheritance / dependency diagram: @@ -37,32 +39,33 @@ 1. Root span at a subsystem entry point (typical usage): @code + #include + using namespace xrpl::telemetry; + // In an RPC handler dispatch: - auto guard = SpanGuard::span(TraceCategory::Rpc, "rpc", commandName); - guard.setAttribute("xrpl.rpc.command", commandName); + auto guard = SpanGuard::span( + TraceCategory::Rpc, rpc_span::prefix::command, commandName); + guard.setAttribute(rpc_span::attr::command, commandName); // ... process request // guard destructor automatically ends the span on scope exit @endcode - 2. Child span for a sub-operation (cross-scope): - @code - auto parent = SpanGuard::span(TraceCategory::Transactions, "tx", "process"); - { - SpanGuard guard(telemetry.startSpan("rpc.command.submit")); - guard.setAttribute("command", "submit"); - // ... guard ends span automatically on scope exit - } - @endcode - - 3. Child span for a sub-operation (cross-scope): + 2. Child span for a sub-operation (scoped child): @code auto parent = SpanGuard::span(TraceCategory::Transactions, "tx", "process"); { auto child = parent.childSpan("tx.apply"); - child.setAttribute("xrpl.tx.type", txType); + child.setAttribute("tx_type", txType); // child ends here } - // parent continues, then ends here + @endcode + + 3. Unrelated span (cross-scope, same thread): + @code + // Transactions and RPC can be active simultaneously + auto txSpan = SpanGuard::span(TraceCategory::Transactions, "tx", "process"); + auto rpcSpan = SpanGuard::span(TraceCategory::Rpc, "rpc", "info"); + // both spans end on scope exit @endcode 4. Cross-thread context propagation: diff --git a/src/xrpld/app/main/GRPCServer.cpp b/src/xrpld/app/main/GRPCServer.cpp index eeb4797b19..92760d641f 100644 --- a/src/xrpld/app/main/GRPCServer.cpp +++ b/src/xrpld/app/main/GRPCServer.cpp @@ -179,6 +179,7 @@ GRPCServerImpl::CallData::process(std::shared_ptr::process(std::shared_ptrerror, ok->success. + span.setAttribute( + rpc_span::attr::rpcStatus, ret ? rpc_span::val::error : rpc_span::val::success); + if (!ret) + span.setOk(); return ret; } catch (std::exception& e) diff --git a/src/xrpld/rpc/detail/ServerHandler.cpp b/src/xrpld/rpc/detail/ServerHandler.cpp index a181ec56cd..4f1918076c 100644 --- a/src/xrpld/rpc/detail/ServerHandler.cpp +++ b/src/xrpld/rpc/detail/ServerHandler.cpp @@ -564,6 +564,7 @@ ServerHandler::processSession( jr[jss::api_version] = jv[jss::api_version]; jr[jss::type] = jss::response; + span.setOk(); return jr; } @@ -598,6 +599,7 @@ ServerHandler::processSession( { session->close(true); } + span.setOk(); } static Json::Value @@ -1036,6 +1038,7 @@ ServerHandler::processRequest( } } + span.setOk(); HTTPReply(httpStatus, response, output, rpcJ); } From f6f0cb1a5fd978cf074971a5456af08d243d0eff Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 27 May 2026 17:55:09 +0100 Subject: [PATCH 3/4] updated class comment Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- include/xrpl/telemetry/SpanNames.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/xrpl/telemetry/SpanNames.h b/include/xrpl/telemetry/SpanNames.h index 9fdc37cd64..fe12e3bdc5 100644 --- a/include/xrpl/telemetry/SpanNames.h +++ b/include/xrpl/telemetry/SpanNames.h @@ -20,8 +20,13 @@ * - Per-span attribute keys: bare field name (span name carries the domain). * - Collision qualifier: _ when bare name collides across * domains or with OTel reserved `status` (e.g. rpc_status, grpc_status). - * - Shared cross-span attributes: _ (underscore) form. - * - Resource attribute keys: xrpl.. (process-identity). + * - Shared cross-span attributes: _ (underscore) form + * (e.g. tx_hash, peer_id, ledger_seq, consensus_round). + * - Resource attribute keys: xrpl.. (dotted) form is + * RESERVED for process-identity attributes set once at startup on the + * OTel resource (e.g. xrpl.network.id, xrpl.network.type). Do not use + * this form for span attributes — it parses awkwardly in TraceQL and + * blurs the resource/span scope distinction. * - Span prefixes: [.]. */ From bfdcd3da87c5b629dd720ed3b1ebb256dad69e0e Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 27 May 2026 18:26:29 +0100 Subject: [PATCH 4/4] fix(telemetry): use resolved command/method name as span suffix Per PR #6438 review thread r3250432621: known-command errors (rpcTOO_BUSY, rpcNO_PERMISSION, etc.) were collapsing into a single rpc.command.unknown span name, hiding per-command error rates in dashboards. Same anti-pattern existed for gRPC, where every method was bucketed under grpc.request with the method relegated to an attribute. - RPCHandler.cpp: doCommand error path uses cmdName as the span suffix; the rpc_span::val::unknownCommand fallback only applies when the request truly omits both command and method fields. - GRPCServer.cpp: gRPC span name is now grpc. (e.g. grpc.GetLedger). Method also retained as an attribute. - GrpcSpanNames.h: drop the unused op::request constant; update the span-hierarchy comment. - RpcSpanNames.h: update the gRPC span diagram to match. Dashboards on downstream phases will benefit from per-command breakdowns without needing TraceQL attribute filters. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/xrpld/app/main/GRPCServer.cpp | 3 +-- src/xrpld/app/main/GrpcSpanNames.h | 14 ++++++-------- src/xrpld/rpc/detail/RPCHandler.cpp | 7 +++++-- src/xrpld/rpc/detail/RpcSpanNames.h | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/xrpld/app/main/GRPCServer.cpp b/src/xrpld/app/main/GRPCServer.cpp index 92760d641f..d0019b79f9 100644 --- a/src/xrpld/app/main/GRPCServer.cpp +++ b/src/xrpld/app/main/GRPCServer.cpp @@ -169,8 +169,7 @@ void GRPCServerImpl::CallData::process(std::shared_ptr coro) { using namespace telemetry; - auto span = - SpanGuard::span(TraceCategory::Rpc, grpc_span::prefix::grpc, grpc_span::op::request); + auto span = SpanGuard::span(TraceCategory::Rpc, grpc_span::prefix::grpc, name_); span.setAttribute(grpc_span::attr::method, name_); try diff --git a/src/xrpld/app/main/GrpcSpanNames.h b/src/xrpld/app/main/GrpcSpanNames.h index c2f40ee53d..2532b3812d 100644 --- a/src/xrpld/app/main/GrpcSpanNames.h +++ b/src/xrpld/app/main/GrpcSpanNames.h @@ -9,13 +9,16 @@ * Span hierarchy: * * +-------------------------------------------------------+ - * | grpc.request | + * | grpc. (e.g. grpc.GetLedger) | * | CallData::process(coro) | * | attrs: method, grpc_role, grpc_status | * +-------------------------------------------------------+ * * Unlike the HTTP/WS RPC path, gRPC has a flat single-span structure * per request since each CallData handles exactly one RPC method. + * The method name is embedded in the span name (rather than only as + * an attribute) so dashboards can break out per-method latency and + * error rates without needing TraceQL attribute filters. */ #include @@ -25,16 +28,11 @@ namespace xrpl::telemetry::grpc_span { // ===== Span prefixes ======================================================= namespace prefix { -/// "grpc" — root prefix for gRPC transport spans. +/// "grpc" — root prefix for gRPC transport spans. The full span name is +/// formed at the call site as `grpc.` (see GRPCServer.cpp). inline constexpr auto grpc = makeStr("grpc"); } // namespace prefix -// ===== Span operation suffixes ============================================= - -namespace op { -inline constexpr auto request = makeStr("request"); -} // namespace op - // ===== Attribute keys ====================================================== namespace attr { diff --git a/src/xrpld/rpc/detail/RPCHandler.cpp b/src/xrpld/rpc/detail/RPCHandler.cpp index e8b6e6d26c..1eca97cd52 100644 --- a/src/xrpld/rpc/detail/RPCHandler.cpp +++ b/src/xrpld/rpc/detail/RPCHandler.cpp @@ -229,8 +229,11 @@ doCommand(RPC::JsonContext& context, Json::Value& result) { cmdName = "unknown"; } - auto span = SpanGuard::span( - TraceCategory::Rpc, rpc_span::prefix::command, rpc_span::val::unknownCommand); + // Use the resolved command name as the span suffix so dashboards + // can break out per-command error rates (e.g. rpc.command.submit + // for a submit that hit rpcTOO_BUSY). Falling back to a single + // "unknown" name only when the request truly omits both fields. + auto span = SpanGuard::span(TraceCategory::Rpc, rpc_span::prefix::command, cmdName); span.setAttribute(rpc_span::attr::command, cmdName.c_str()); span.setError(get_error_info(error).token.c_str()); diff --git a/src/xrpld/rpc/detail/RpcSpanNames.h b/src/xrpld/rpc/detail/RpcSpanNames.h index c99c153f11..62eba3de71 100644 --- a/src/xrpld/rpc/detail/RpcSpanNames.h +++ b/src/xrpld/rpc/detail/RpcSpanNames.h @@ -86,9 +86,9 @@ * gRPC path (see GrpcSpanNames.h for constants): * * +-------------------------------------------------------+ - * | grpc.request | + * | grpc. (e.g. grpc.GetLedger) | * | CallData::process(coro) | - * | attrs: method, grpc_status | + * | attrs: method, grpc_status | * +-------------------------------------------------------+ * * Covered paths: