Merge branch 'pratik/otel-phase1c-rpc-integration' into pratik/otel-phase2-rpc-tracing

This commit is contained in:
Pratik Mankawde
2026-05-13 15:55:39 +01:00
7 changed files with 128 additions and 38 deletions

View File

@@ -185,15 +185,15 @@ Traced RPC operations produce a span hierarchy like:
```
rpc.request
└── rpc.command.server_info (xrpl.rpc.command=server_info, xrpl.rpc.status=success)
└── rpc.command.server_info (command=server_info, rpc_status=success)
```
Each span includes attributes:
- `xrpl.rpc.command` — the RPC method name
- `xrpl.rpc.version` — API version
- `xrpl.rpc.role``admin` or `user`
- `xrpl.rpc.status``success` or `error`
- `command` — the RPC method name
- `version` — API version
- `rpc_role``admin` or `user`
- `rpc_status``success` or `error`
## Running Tests

View File

@@ -50,7 +50,7 @@
auto span = SpanGuard::span(
TraceCategory::Rpc, rpc_span::prefix::command, "submit");
span.setAttribute(rpc_span::attr::command, "submit");
span.setAttribute(rpc_span::attr::status, rpc_span::val::success);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::success);
// span ended automatically on scope exit
@endcode
@@ -83,7 +83,7 @@
TraceCategory::Rpc, rpc_span::prefix::rpc, "request");
if (span) {
// expensive attribute computation only when active
span.setAttribute(rpc_span::attr::payloadSize, computeSize());
span.setAttribute(rpc_span::attr::requestPayloadSize, computeSize());
}
@endcode

View File

@@ -16,9 +16,12 @@
* concatenation support. boost::static_string is not constexpr.
* StaticStr<N> exists specifically for compile-time dot-join composition.
*
* Naming conventions follow OpenTelemetry semantic conventions:
* - Attribute keys: "xrpl.<subsystem>.<field>"
* - Span prefixes: "<subsystem>[.<component>]"
* Naming conventions (see spec 2026-05-13-span-attr-naming-design):
* - Per-span attribute keys: bare field name (span name carries the domain).
* - Collision qualifier: <domain>_<field> when bare name collides across
* domains or with OTel reserved `status` (e.g. rpc_status, grpc_status).
* - Resource attribute keys: xrpl.<subsystem>.<field> (process-identity).
* - Span prefixes: <subsystem>[.<component>].
*/
#include <cstddef>
@@ -98,7 +101,7 @@ inline constexpr auto link = makeStr("link");
namespace attr {
inline constexpr auto networkId = join(join(seg::xrpl, seg::network), makeStr("id"));
inline constexpr auto networkType = join(join(seg::xrpl, seg::network), makeStr("type"));
inline constexpr auto linkType = join(join(seg::xrpl, seg::link), makeStr("type"));
inline constexpr auto linkType = makeStr("link_type");
/// Node health attributes (cross-cutting, used by RPC/consensus/tx spans).
inline constexpr auto xrplNode = join(seg::xrpl, makeStr("node"));

View File

@@ -11,7 +11,7 @@
* +-------------------------------------------------------+
* | grpc.request |
* | CallData::process(coro) |
* | attrs: method, role, status |
* | attrs: method, grpc_role, grpc_status |
* +-------------------------------------------------------+
*
* Unlike the HTTP/WS RPC path, gRPC has a flat single-span structure
@@ -38,14 +38,12 @@ inline constexpr auto request = makeStr("request");
// ===== Attribute keys ======================================================
namespace attr {
inline constexpr auto xrplGrpc = join(seg::xrpl, makeStr("grpc"));
/// "xrpl.grpc.method"
inline constexpr auto method = join(xrplGrpc, makeStr("method"));
/// "xrpl.grpc.role"
inline constexpr auto role = join(xrplGrpc, makeStr("role"));
/// "xrpl.grpc.status"
inline constexpr auto status = join(xrplGrpc, makeStr("status"));
/// "method" — gRPC method name (e.g. GetLedger).
inline constexpr auto method = makeStr("method");
/// "grpc_role" — Domain-qualified: collides with rpc_role.
inline constexpr auto grpcRole = makeStr("grpc_role");
/// "grpc_status" — Domain-qualified: avoids OTel reserved span status.
inline constexpr auto grpcStatus = makeStr("grpc_status");
} // namespace attr
// ===== Attribute values ====================================================

View File

@@ -168,7 +168,7 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
span.setAttribute(rpc_span::attr::command, name.c_str());
span.setAttribute(rpc_span::attr::version, static_cast<int64_t>(context.apiVersion));
span.setAttribute(
rpc_span::attr::role,
rpc_span::attr::rpcRole,
context.role == Role::ADMIN ? std::string_view(rpc_span::val::admin)
: std::string_view(rpc_span::val::user));
span.setAttribute(attr::nodeAmendmentBlocked, context.app.getOPs().isAmendmentBlocked());
@@ -189,7 +189,7 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
JLOG(context.j.debug()) << "RPC call " << name << " completed in "
<< ((end - start).count() / 1000000000.0) << "seconds";
perfLog.rpcFinish(name, curId);
span.setAttribute(rpc_span::attr::status, rpc_span::val::success);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::success);
return ret;
}
catch (std::exception& e)
@@ -197,7 +197,7 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
perfLog.rpcError(name, curId);
JLOG(context.j.info()) << "Caught throw: " << e.what();
span.recordException(e);
span.setAttribute(rpc_span::attr::status, rpc_span::val::error);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error);
if (context.loadType == Resource::feeReferenceRPC)
context.loadType = Resource::feeExceptionRPC;

View File

@@ -14,8 +14,99 @@
* auto span = SpanGuard::span(
* TraceCategory::Rpc, rpc_span::prefix::command, "submit");
* span.setAttribute(rpc_span::attr::command, "submit");
* span.setAttribute(rpc_span::attr::status, rpc_span::val::success);
* span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::success);
* @endcode
*
* Span hierarchy (automatic nesting via OTel thread-local context):
*
* HTTP JSON-RPC path (single request):
*
* +-------------------------------------------------------+
* | rpc.http_request |
* | ServerHandler::processSession(Session) |
* | |
* | +--------------------------------------------------+ |
* | | rpc.process | |
* | | ServerHandler::processRequest() | |
* | | | |
* | | +---------------------------------------------+ | |
* | | | rpc.command.{name} | | |
* | | | RPC::callMethod() | | |
* | | | attrs: command, version, rpc_role, rpc_status | | |
* | | +---------------------------------------------+ | |
* | +--------------------------------------------------+ |
* +-------------------------------------------------------+
*
* HTTP batch path (multiple commands per request):
*
* +-------------------------------------------------------+
* | rpc.http_request |
* | |
* | +--------------------------------------------------+ |
* | | rpc.process | |
* | | | |
* | | +------------------+ +------------------+ | |
* | | | rpc.command.{a} | | rpc.command.{b} | ... | |
* | | +------------------+ +------------------+ | |
* | +--------------------------------------------------+ |
* +-------------------------------------------------------+
*
* WebSocket path:
*
* +-------------------------------------------------------+
* | rpc.ws_message |
* | ServerHandler::processSession(WSSession) |
* | |
* | +--------------------------------------------------+ |
* | | rpc.command.{name} | |
* | | RPC::callMethod() | |
* | | attrs: command, version, rpc_role, rpc_status | |
* | +--------------------------------------------------+ |
* +-------------------------------------------------------+
*
* WebSocket error paths:
*
* +-------------------------------------------------------+
* | rpc.ws_message (error: invalid_json) |
* | ServerHandler::onWSMessage() — parse failure |
* +-------------------------------------------------------+
*
* +-------------------------------------------------------+
* | rpc.ws_upgrade |
* | ServerHandler::onHandoff() — upgrade try/catch |
* +-------------------------------------------------------+
*
* Command dispatch error path:
*
* +-------------------------------------------------------+
* | rpc.command.{name} (error: too_busy/unknown/etc) |
* | RPC::doCommand() — fillHandler() rejection |
* +-------------------------------------------------------+
*
* gRPC path (see GrpcSpanNames.h for constants):
*
* +-------------------------------------------------------+
* | grpc.request |
* | CallData::process(coro) |
* | attrs: method, grpc_status |
* +-------------------------------------------------------+
*
* Covered paths:
* - HTTP JSON-RPC (single and batch requests)
* - WebSocket RPC commands
* - WebSocket message parse errors (invalid JSON, oversized)
* - WebSocket upgrade failures (protocol handshake errors)
* - Admin CLI (connects via HTTP internally)
* - Command dispatch rejections (unknown cmd, too busy, no perm)
* - gRPC endpoints (GetLedger, GetLedgerData, GetLedgerDiff,
* GetLedgerEntry)
* - Command execution: timing, success/failure, exceptions
* - Per-command attributes: name, API version, rpc_role, rpc_status
*
* Known gaps (not yet instrumented):
* - Early validation errors in processRequest() before rpc.process
* span (malformed JSON, auth failures, oversized requests)
* - Subscription push notifications (server-initiated, not RPC)
*/
#include <xrpl/telemetry/SpanNames.h>
@@ -43,18 +134,16 @@ inline constexpr auto process = makeStr("process");
// ===== Attribute keys ======================================================
namespace attr {
inline constexpr auto xrplRpc = join(seg::xrpl, seg::rpc);
/// "xrpl.rpc.command"
inline constexpr auto command = join(xrplRpc, makeStr("command"));
/// "xrpl.rpc.version"
inline constexpr auto version = join(xrplRpc, makeStr("version"));
/// "xrpl.rpc.role"
inline constexpr auto role = join(xrplRpc, makeStr("role"));
/// "xrpl.rpc.status"
inline constexpr auto status = join(xrplRpc, makeStr("status"));
/// "xrpl.rpc.payload_size"
inline constexpr auto payloadSize = join(xrplRpc, makeStr("payload_size"));
/// "command" — RPC method name.
inline constexpr auto command = makeStr("command");
/// "version" — api_version per request.
inline constexpr auto version = makeStr("version");
/// "rpc_role" — admin|user. Domain-qualified: collides with grpc_role.
inline constexpr auto rpcRole = makeStr("rpc_role");
/// "rpc_status" — success|error. Domain-qualified: avoids OTel reserved span status.
inline constexpr auto rpcStatus = makeStr("rpc_status");
/// "request_payload_size" — bytes of inbound request payload.
inline constexpr auto requestPayloadSize = makeStr("request_payload_size");
} // namespace attr
// ===== Attribute values ====================================================

View File

@@ -513,7 +513,7 @@ ServerHandler::processSession(
JLOG(m_journal.error()) << "Exception while processing WS: " << ex.what() << "\n"
<< "Input JSON: " << Json::Compact{Json::Value{jv}};
span.recordException(ex);
span.setAttribute(rpc_span::attr::status, rpc_span::val::error);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error);
// LCOV_EXCL_STOP
}
@@ -904,7 +904,7 @@ ServerHandler::processRequest(
<< "Internal error : " << ex.what()
<< " when processing request: " << Json::Compact{Json::Value{params}};
span.recordException(ex);
span.setAttribute(rpc_span::attr::status, rpc_span::val::error);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error);
// LCOV_EXCL_STOP
}