From e11bf35691ca31aa1a8a8e52631d26374339fe5e Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 10 Jun 2026 10:42:52 +0100 Subject: [PATCH 1/6] fix: Update BasicConfig.h include path after upstream reorg BasicConfig.h moved from xrpl/basics/ to xrpl/config/ on develop (PR #7095 / reorg). Phase 1b's telemetry headers still referenced the old path, breaking a fresh compile with "BasicConfig.h: No such file or directory". Point both telemetry includes at the new location. Co-Authored-By: Claude Opus 4.8 --- include/xrpl/telemetry/Telemetry.h | 2 +- src/libxrpl/telemetry/TelemetryConfig.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/xrpl/telemetry/Telemetry.h b/include/xrpl/telemetry/Telemetry.h index fe7ef33bf6..f1144da676 100644 --- a/include/xrpl/telemetry/Telemetry.h +++ b/include/xrpl/telemetry/Telemetry.h @@ -70,8 +70,8 @@ The OTel SDK's TracerProvider and Tracer are internally thread-safe. */ -#include #include +#include #include #include diff --git a/src/libxrpl/telemetry/TelemetryConfig.cpp b/src/libxrpl/telemetry/TelemetryConfig.cpp index 0863623f66..98c6dbc8c1 100644 --- a/src/libxrpl/telemetry/TelemetryConfig.cpp +++ b/src/libxrpl/telemetry/TelemetryConfig.cpp @@ -7,7 +7,7 @@ See cfg/xrpld-example.cfg for the full list of available options. */ -#include +#include #include #include From 38fbab1d186d265d61cf29e478400e3300e1145e Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 10 Jun 2026 10:50:54 +0100 Subject: [PATCH 2/6] levelization Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- .github/scripts/levelization/results/ordering.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/levelization/results/ordering.txt b/.github/scripts/levelization/results/ordering.txt index 644210f70e..c7b11a8855 100644 --- a/.github/scripts/levelization/results/ordering.txt +++ b/.github/scripts/levelization/results/ordering.txt @@ -47,6 +47,7 @@ libxrpl.shamap > xrpl.nodestore libxrpl.shamap > xrpl.protocol libxrpl.shamap > xrpl.shamap libxrpl.telemetry > xrpl.basics +libxrpl.telemetry > xrpl.config libxrpl.telemetry > xrpl.telemetry libxrpl.tx > xrpl.basics libxrpl.tx > xrpl.conditions @@ -250,7 +251,7 @@ xrpl.server > xrpl.shamap xrpl.shamap > xrpl.basics xrpl.shamap > xrpl.nodestore xrpl.shamap > xrpl.protocol -xrpl.telemetry > xrpl.basics +xrpl.telemetry > xrpl.config xrpl.telemetry > xrpld.consensus xrpl.telemetry > xrpld.rpc xrpl.tx > xrpl.basics From e205d0ef8ec113b354c4d1828284bda3bfb2e711 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 10 Jun 2026 14:00:52 +0100 Subject: [PATCH 3/6] handle gTlDiscardCurrentSpan change Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- cmake/XrplCore.cmake | 2 +- include/xrpl/telemetry/Telemetry.h | 6 +++--- src/libxrpl/telemetry/SpanGuard.cpp | 8 ++++++++ src/libxrpl/telemetry/Telemetry.cpp | 3 +-- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/cmake/XrplCore.cmake b/cmake/XrplCore.cmake index c4c9c180a1..c5748016c9 100644 --- a/cmake/XrplCore.cmake +++ b/cmake/XrplCore.cmake @@ -209,7 +209,7 @@ target_link_libraries(xrpl.libxrpl.tx PUBLIC xrpl.libxrpl.ledger) add_module(xrpl telemetry) target_link_libraries( xrpl.libxrpl.telemetry - PUBLIC xrpl.libxrpl.basics xrpl.libxrpl.beast + PUBLIC xrpl.libxrpl.basics xrpl.libxrpl.beast xrpl.libxrpl.config ) if(telemetry) target_link_libraries( diff --git a/include/xrpl/telemetry/Telemetry.h b/include/xrpl/telemetry/Telemetry.h index f1144da676..fdc118ed95 100644 --- a/include/xrpl/telemetry/Telemetry.h +++ b/include/xrpl/telemetry/Telemetry.h @@ -135,11 +135,11 @@ public: std::string serviceName = "xrpld"; /** OTel resource attribute `service.version` (set from BuildInfo). */ - std::string serviceVersion{}; + std::string serviceVersion; /** OTel resource attribute `service.instance.id` (defaults to node public key). */ - std::string serviceInstanceId{}; + std::string serviceInstanceId; /** OTLP/HTTP endpoint URL where spans are sent. */ std::string exporterEndpoint = "http://localhost:4318/v1/traces"; @@ -148,7 +148,7 @@ public: bool useTls = false; /** Path to a CA certificate bundle for TLS verification. */ - std::string tlsCertPath{}; + std::string tlsCertPath; /** Head-based sampling ratio. Intentionally fixed at 1.0 (sample everything) and NOT read from config. A per-node ratio would let diff --git a/src/libxrpl/telemetry/SpanGuard.cpp b/src/libxrpl/telemetry/SpanGuard.cpp index 1e7024150b..87739ae7b8 100644 --- a/src/libxrpl/telemetry/SpanGuard.cpp +++ b/src/libxrpl/telemetry/SpanGuard.cpp @@ -344,6 +344,14 @@ SpanGuard::discard() { gTlDiscardCurrentSpan = true; impl_->span->End(); + // Clear here so discard() owns the flag's whole lifetime + // (set -> End -> clear) in one scope, rather than relying on + // FilteringSpanProcessor::OnEnd() to clear it. Today every valid guard + // wraps a recording span (head sampling is 1.0), so OnEnd() always runs + // and clearing here is equivalent — but colocating set and clear keeps + // the flag leak-proof if a later phase can hand back a non-recording + // span (e.g. honoring a non-sampled remote parent during propagation). + gTlDiscardCurrentSpan = false; impl_->span = nullptr; // prevent ~Impl from calling End() again impl_.reset(); } diff --git a/src/libxrpl/telemetry/Telemetry.cpp b/src/libxrpl/telemetry/Telemetry.cpp index c4425bda84..9e55a40f4e 100644 --- a/src/libxrpl/telemetry/Telemetry.cpp +++ b/src/libxrpl/telemetry/Telemetry.cpp @@ -122,8 +122,7 @@ public: { // SpanGuard::discard() set the flag on this thread just before // calling Span::End(), which invokes OnEnd() synchronously. - // Clear the flag and drop the span. - gTlDiscardCurrentSpan = false; + // Drop the span. return; } delegate_->OnEnd(std::move(span)); From 34d7280df4972aff59d8e5997722e261b47ab643 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 10 Jun 2026 15:47:43 +0100 Subject: [PATCH 4/6] addressed PR comments Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- src/xrpld/rpc/ServerHandler.h | 6 +- src/xrpld/rpc/detail/RPCHandler.cpp | 12 ++- src/xrpld/rpc/detail/ServerHandler.cpp | 106 +++++++++++++++++-------- 3 files changed, 91 insertions(+), 33 deletions(-) diff --git a/src/xrpld/rpc/ServerHandler.h b/src/xrpld/rpc/ServerHandler.h index 0aac05ea44..b1ecf71f82 100644 --- a/src/xrpld/rpc/ServerHandler.h +++ b/src/xrpld/rpc/ServerHandler.h @@ -177,7 +177,11 @@ private: void processSession(std::shared_ptr const&, std::shared_ptr coro); - void + /** Process an RPC request and write the reply to `output`. + @return false if the request resulted in an error response, true + otherwise. Lets the caller's enclosing span reflect the outcome. + */ + bool processRequest( Port const& port, std::string const& request, diff --git a/src/xrpld/rpc/detail/RPCHandler.cpp b/src/xrpld/rpc/detail/RPCHandler.cpp index 5fbc71bf4c..04358eeeb6 100644 --- a/src/xrpld/rpc/detail/RPCHandler.cpp +++ b/src/xrpld/rpc/detail/RPCHandler.cpp @@ -191,8 +191,17 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object& rpc_span::attr::rpcStatus, ret ? std::string_view{rpc_span::val::error} : std::string_view{rpc_span::val::success}); - if (!ret) + // Reflect the result in the OTel span status, not just the attribute, + // so non-exception RPC errors (rpcTOO_BUSY, rpcNO_PERMISSION, ...) are + // visible to {status.code=error} queries. + if (ret) + { + span.setError(rpc_span::val::error); + } + else + { span.setOk(); + } return ret; } catch (std::exception& e) @@ -237,6 +246,7 @@ doCommand(RPC::JsonContext& context, json::Value& result) // "unknown" name only when the request truly omits both fields. auto span = SpanGuard::span(TraceCategory::Rpc, rpc_span::prefix::command, cmdName); span.setAttribute(rpc_span::attr::command, cmdName.c_str()); + span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error); span.setError(getErrorInfo(error).token.cStr()); injectError(error, result); diff --git a/src/xrpld/rpc/detail/ServerHandler.cpp b/src/xrpld/rpc/detail/ServerHandler.cpp index 7d9e74dec1..c1fd8f40eb 100644 --- a/src/xrpld/rpc/detail/ServerHandler.cpp +++ b/src/xrpld/rpc/detail/ServerHandler.cpp @@ -434,6 +434,8 @@ ServerHandler::processSession( session->close({boost::beast::websocket::policy_error, "threshold exceeded"}); // FIX: This rpcError is not delivered since the session // was just closed. + span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error); + span.setError(rpc_span::val::error); return rpcError(RpcSlowDown); } @@ -465,6 +467,8 @@ ServerHandler::processSession( jr[jss::api_version] = jv[jss::api_version]; is->getConsumer().charge(Resource::kFeeMalformedRpc); + span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error); + span.setError(rpc_span::val::error); return jr; } @@ -545,12 +549,19 @@ ServerHandler::processSession( } jr[jss::request] = rq; + // Mark the span according to the final result. Doing it here (rather + // than an unconditional setOk later) ensures error responses — from + // doCommand, a FORBID role, or the catch block above — are not + // overwritten as OK. + span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error); + span.setError(rpc_span::val::error); } else { if (jr[jss::result].isMember("forwarded") && jr[jss::result]["forwarded"]) jr = jr[jss::result]; jr[jss::status] = jss::success; + span.setOk(); } if (jv.isMember(jss::id)) @@ -563,7 +574,6 @@ ServerHandler::processSession( jr[jss::api_version] = jv[jss::api_version]; jr[jss::type] = jss::response; - span.setOk(); return jr; } @@ -576,7 +586,7 @@ ServerHandler::processSession( auto span = SpanGuard::span(TraceCategory::Rpc, rpc_span::prefix::rpc, rpc_span::op::httpRequest); - processRequest( + bool const ok = processRequest( session->port(), buffersToString(session->request().body().data()), session->remoteAddress().atPort(0), @@ -598,7 +608,15 @@ ServerHandler::processSession( { session->close(true); } - span.setOk(); + // Reflect the request outcome on the wrapper span instead of always OK. + if (ok) + { + span.setOk(); + } + else + { + span.setError(rpc_span::val::error); + } } static json::Value @@ -617,7 +635,7 @@ constexpr json::Int kServerOverloaded = -32604; constexpr json::Int kForbidden = -32605; constexpr json::Int kWrongVersion = -32606; -void +bool ServerHandler::processRequest( Port const& port, std::string const& request, @@ -630,18 +648,29 @@ ServerHandler::processRequest( auto span = SpanGuard::span(TraceCategory::Rpc, rpc_span::prefix::rpc, rpc_span::op::process); auto rpcJ = app_.getJournal("RPC"); + // Tracks whether any failure occurred. Set on every error path (early + // returns, the catch block, and per-request error replies) and used at the + // end to mark the span status. The HTTP status code alone is insufficient: + // it stays 200 for batch responses and for ripplerpc < 3.0, so relying on + // it would let payload-level errors end the span as successful. + bool spanHadError = false; + + // Marks the span as failed before sending an error reply, so the + // early-return validation paths below are not later seen as successful + // (the span would otherwise end UNSET, invisible to {status.code=error}). + auto httpReplyError = [&](int status, std::string const& message) { + spanHadError = true; + httpReply(status, message, output, rpcJ); + }; + json::Value jsonOrig; { json::Reader reader; if ((request.size() > RPC::Tuning::kMaxRequestSize) || !reader.parse(request, jsonOrig) || !jsonOrig || !jsonOrig.isObject()) { - httpReply( - 400, - "Unable to parse request: " + reader.getFormattedErrorMessages(), - output, - rpcJ); - return; + httpReplyError(400, "Unable to parse request: " + reader.getFormattedErrorMessages()); + return false; } } @@ -652,8 +681,8 @@ ServerHandler::processRequest( batch = true; if (!jsonOrig.isMember(jss::params) || !jsonOrig[jss::params].isArray()) { - httpReply(400, "Malformed batch request", output, rpcJ); - return; + httpReplyError(400, "Malformed batch request"); + return false; } size = jsonOrig[jss::params].size(); } @@ -691,8 +720,8 @@ ServerHandler::processRequest( { if (!batch) { - httpReply(400, jss::invalid_API_version.cStr(), output, rpcJ); - return; + httpReplyError(400, jss::invalid_API_version.cStr()); + return false; } json::Value r(json::ValueType::Object); r[jss::request] = jsonRPC; @@ -734,8 +763,8 @@ ServerHandler::processRequest( { if (!batch) { - httpReply(503, "Server is overloaded", output, rpcJ); - return; + httpReplyError(503, "Server is overloaded"); + return false; } json::Value r = jsonRPC; r[jss::error] = makeJsonError(kServerOverloaded, "Server is overloaded"); @@ -749,8 +778,8 @@ ServerHandler::processRequest( usage.charge(Resource::kFeeMalformedRpc); if (!batch) { - httpReply(403, "Forbidden", output, rpcJ); - return; + httpReplyError(403, "Forbidden"); + return false; } json::Value r = jsonRPC; r[jss::error] = makeJsonError(kForbidden, "Forbidden"); @@ -763,8 +792,8 @@ ServerHandler::processRequest( usage.charge(Resource::kFeeMalformedRpc); if (!batch) { - httpReply(400, "Null method", output, rpcJ); - return; + httpReplyError(400, "Null method"); + return false; } json::Value r = jsonRPC; r[jss::error] = makeJsonError(kMethodNotFound, "Null method"); @@ -778,8 +807,8 @@ ServerHandler::processRequest( usage.charge(Resource::kFeeMalformedRpc); if (!batch) { - httpReply(400, "method is not string", output, rpcJ); - return; + httpReplyError(400, "method is not string"); + return false; } json::Value r = jsonRPC; r[jss::error] = makeJsonError(kMethodNotFound, "method is not string"); @@ -793,8 +822,8 @@ ServerHandler::processRequest( usage.charge(Resource::kFeeMalformedRpc); if (!batch) { - httpReply(400, "method is empty", output, rpcJ); - return; + httpReplyError(400, "method is empty"); + return false; } json::Value r = jsonRPC; r[jss::error] = makeJsonError(kMethodNotFound, "method is empty"); @@ -819,8 +848,8 @@ ServerHandler::processRequest( else if (!params.isArray() || params.size() != 1) { usage.charge(Resource::kFeeMalformedRpc); - httpReply(400, "params unparsable", output, rpcJ); - return; + httpReplyError(400, "params unparsable"); + return false; } else { @@ -828,8 +857,8 @@ ServerHandler::processRequest( if (!params.isObjectOrNull()) { usage.charge(Resource::kFeeMalformedRpc); - httpReply(400, "params unparsable", output, rpcJ); - return; + httpReplyError(400, "params unparsable"); + return false; } } } @@ -846,8 +875,8 @@ ServerHandler::processRequest( usage.charge(Resource::kFeeMalformedRpc); if (!batch) { - httpReply(400, "ripplerpc is not a string", output, rpcJ); - return; + httpReplyError(400, "ripplerpc is not a string"); + return false; } json::Value r = jsonRPC; @@ -906,6 +935,7 @@ ServerHandler::processRequest( << " when processing request: " << json::Compact{json::Value{params}}; span.recordException(ex); span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error); + spanHadError = true; // LCOV_EXCL_STOP } @@ -922,6 +952,7 @@ ServerHandler::processRequest( { if (result.isMember(jss::error)) { + spanHadError = true; result[jss::status] = jss::error; result["code"] = result[jss::error_code]; result["message"] = result[jss::error_message]; @@ -942,6 +973,7 @@ ServerHandler::processRequest( // received. if (result.isMember(jss::error)) { + spanHadError = true; auto rq = params; if (rq.isObject()) @@ -1037,8 +1069,20 @@ ServerHandler::processRequest( } } - span.setOk(); + // Mark the span error if any request failed or the HTTP status is an error. + // spanHadError catches payload-level errors that httpStatus misses (batch + // responses and ripplerpc < 3.0 always return HTTP 200). + if (spanHadError || httpStatus >= 400) + { + span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error); + span.setError(rpc_span::val::error); + } + else + { + span.setOk(); + } httpReply(httpStatus, response, output, rpcJ); + return !spanHadError; } //------------------------------------------------------------------------------ From 56f2e2c4cf6de814446bb56c9b45648ce05927b3 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 10 Jun 2026 15:57:22 +0100 Subject: [PATCH 5/6] add descriptive error message Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- src/xrpld/rpc/detail/ServerHandler.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/xrpld/rpc/detail/ServerHandler.cpp b/src/xrpld/rpc/detail/ServerHandler.cpp index c1fd8f40eb..282593a282 100644 --- a/src/xrpld/rpc/detail/ServerHandler.cpp +++ b/src/xrpld/rpc/detail/ServerHandler.cpp @@ -435,7 +435,7 @@ ServerHandler::processSession( // FIX: This rpcError is not delivered since the session // was just closed. span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error); - span.setError(rpc_span::val::error); + span.setError("resource threshold exceeded"); return rpcError(RpcSlowDown); } @@ -468,7 +468,7 @@ ServerHandler::processSession( is->getConsumer().charge(Resource::kFeeMalformedRpc); span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error); - span.setError(rpc_span::val::error); + span.setError(jr[jss::error].asString()); return jr; } @@ -554,7 +554,7 @@ ServerHandler::processSession( // doCommand, a FORBID role, or the catch block above — are not // overwritten as OK. span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error); - span.setError(rpc_span::val::error); + span.setError(jr[jss::error].asString()); } else { @@ -660,6 +660,7 @@ ServerHandler::processRequest( // (the span would otherwise end UNSET, invisible to {status.code=error}). auto httpReplyError = [&](int status, std::string const& message) { spanHadError = true; + span.setError(message); httpReply(status, message, output, rpcJ); }; From 4ed409f35ad1b04aef21dc8bca37ace7cc211b36 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 10 Jun 2026 16:05:04 +0100 Subject: [PATCH 6/6] addressed code review comments Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- docker/telemetry/grafana/provisioning/datasources/tempo.yaml | 2 +- src/xrpld/rpc/detail/RpcSpanNames.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml index 1af993961a..d6d2c05d2a 100644 --- a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml +++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml @@ -95,7 +95,7 @@ datasources: tag: command operator: "=" scope: span - type: static + type: dynamic - id: rpc-status tag: rpc_status operator: "=" diff --git a/src/xrpld/rpc/detail/RpcSpanNames.h b/src/xrpld/rpc/detail/RpcSpanNames.h index e7bae84c2f..88fd299f29 100644 --- a/src/xrpld/rpc/detail/RpcSpanNames.h +++ b/src/xrpld/rpc/detail/RpcSpanNames.h @@ -159,7 +159,7 @@ using telemetry::attr_val::error; using telemetry::attr_val::success; inline constexpr auto admin = makeStr("admin"); inline constexpr auto user = makeStr("user"); -inline constexpr auto unknownCommand = makeStr("unknown_command"); +inline constexpr auto unknownCommand = makeStr("unknown"); /// "invalid_json" — WS message parse failure or oversize. inline constexpr auto invalidJson = makeStr("invalid_json"); } // namespace val