From 994e4258045eb8aa11b8c87bb9bfad5bc0c6a14c Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Mon, 1 Jun 2026 18:07:23 +0100 Subject: [PATCH 01/11] more clang-tid fixes! Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- .../xrpl/telemetry/TraceContextPropagator.h | 6 ++--- .../telemetry/TraceContextPropagator.cpp | 27 ++++++++++++------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/xrpl/telemetry/TraceContextPropagator.h b/include/xrpl/telemetry/TraceContextPropagator.h index d699272810..95f77e6841 100644 --- a/include/xrpl/telemetry/TraceContextPropagator.h +++ b/include/xrpl/telemetry/TraceContextPropagator.h @@ -26,8 +26,7 @@ #include -namespace xrpl { -namespace telemetry { +namespace xrpl::telemetry { /** Extract OTel context from a protobuf TraceContext message. @@ -92,7 +91,6 @@ injectToProtobuf(opentelemetry::context::Context const& ctx, protocol::TraceCont proto.set_trace_flags(spanCtx.trace_flags().flags()); } -} // namespace telemetry -} // namespace xrpl +} // namespace xrpl::telemetry #endif // XRPL_ENABLE_TELEMETRY diff --git a/src/tests/libxrpl/telemetry/TraceContextPropagator.cpp b/src/tests/libxrpl/telemetry/TraceContextPropagator.cpp index a8390bf768..67b4428b75 100644 --- a/src/tests/libxrpl/telemetry/TraceContextPropagator.cpp +++ b/src/tests/libxrpl/telemetry/TraceContextPropagator.cpp @@ -2,16 +2,22 @@ #ifdef XRPL_ENABLE_TELEMETRY +#include #include #include +#include #include #include #include +#include #include +#include +#include #include #include +#include #include namespace trace = opentelemetry::trace; @@ -37,10 +43,11 @@ TEST(TraceContextPropagator, round_trip) 0x10}; std::uint8_t spanIdBuf[8] = {0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x11, 0x22}; - trace::TraceId traceId(opentelemetry::nostd::span(traceIdBuf, 16)); - trace::SpanId spanId(opentelemetry::nostd::span(spanIdBuf, 8)); - trace::TraceFlags flags(trace::TraceFlags::kIsSampled); - trace::SpanContext spanCtx(traceId, spanId, flags, true); + trace::TraceId const traceId( + opentelemetry::nostd::span(traceIdBuf, 16)); + trace::SpanId const spanId(opentelemetry::nostd::span(spanIdBuf, 8)); + trace::TraceFlags const flags(trace::TraceFlags::kIsSampled); + trace::SpanContext const spanCtx(traceId, spanId, flags, true); auto ctx = opentelemetry::context::Context{}.SetValue( trace::kSpanKey, @@ -53,7 +60,7 @@ TEST(TraceContextPropagator, round_trip) EXPECT_EQ(proto.trace_id().size(), 16u); EXPECT_TRUE(proto.has_span_id()); EXPECT_EQ(proto.span_id().size(), 8u); - EXPECT_EQ(proto.trace_flags(), static_cast(trace::TraceFlags::kIsSampled)); + EXPECT_EQ(proto.trace_flags(), static_cast(trace::TraceFlags::kIsSampled)); EXPECT_EQ(std::memcmp(proto.trace_id().data(), traceIdBuf, 16), 0); EXPECT_EQ(std::memcmp(proto.span_id().data(), spanIdBuf, 8), 0); @@ -71,7 +78,7 @@ TEST(TraceContextPropagator, round_trip) TEST(TraceContextPropagator, extract_empty_protobuf) { - protocol::TraceContext proto; + protocol::TraceContext const proto; auto ctx = xrpl::telemetry::extractFromProtobuf(proto); auto span = trace::GetSpan(ctx); if (span) @@ -124,10 +131,10 @@ TEST(TraceContextPropagator, flags_preservation) std::uint8_t spanIdBuf[8] = {1, 2, 3, 4, 5, 6, 7, 8}; // Test with flags NOT sampled (flags = 0) - trace::TraceFlags flags(0); - trace::SpanContext spanCtx( - trace::TraceId(opentelemetry::nostd::span(traceIdBuf, 16)), - trace::SpanId(opentelemetry::nostd::span(spanIdBuf, 8)), + trace::TraceFlags const flags(0); + trace::SpanContext const spanCtx( + trace::TraceId(opentelemetry::nostd::span(traceIdBuf, 16)), + trace::SpanId(opentelemetry::nostd::span(spanIdBuf, 8)), flags, true); From 25e08b1840294ddbcf2ccb8c52ad92303451ae36 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Tue, 2 Jun 2026 10:46:27 +0100 Subject: [PATCH 02/11] clang-tidy fixes Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- src/libxrpl/beast/insight/OTelCollector.cpp | 17 ++++++------- .../telemetry/detail/ValidationTracker.cpp | 24 +++++++++---------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/libxrpl/beast/insight/OTelCollector.cpp b/src/libxrpl/beast/insight/OTelCollector.cpp index c869451511..b42972b54f 100644 --- a/src/libxrpl/beast/insight/OTelCollector.cpp +++ b/src/libxrpl/beast/insight/OTelCollector.cpp @@ -32,6 +32,7 @@ #include +#include #include #include #include @@ -775,15 +776,15 @@ OTelCollectorImp::makeMeter(std::string const& name) void OTelCollectorImp::addHook(OTelHookImpl* hook) { - std::lock_guard lock(mutex_); + std::scoped_lock const lock(mutex_); hooks_.push_back(hook); } void OTelCollectorImp::removeHook(OTelHookImpl* hook) { - std::lock_guard lock(mutex_); - hooks_.erase(std::remove(hooks_.begin(), hooks_.end(), hook), hooks_.end()); + std::scoped_lock const lock(mutex_); + std::erase(hooks_, hook); } void @@ -802,7 +803,7 @@ OTelCollectorImp::callHooks() if (!lastHookCallMs_.compare_exchange_strong(last, now, std::memory_order_acq_rel)) return; // Another thread won the race. - std::lock_guard lock(mutex_); + std::scoped_lock const lock(mutex_); for (auto* hook : hooks_) hook->callHandler(); } @@ -810,15 +811,15 @@ OTelCollectorImp::callHooks() void OTelCollectorImp::addGauge(OTelGaugeImpl* gauge) { - std::lock_guard lock(mutex_); + std::scoped_lock const lock(mutex_); gauges_.push_back(gauge); } void OTelCollectorImp::removeGauge(OTelGaugeImpl* gauge) { - std::lock_guard lock(mutex_); - gauges_.erase(std::remove(gauges_.begin(), gauges_.end(), gauge), gauges_.end()); + std::scoped_lock const lock(mutex_); + std::erase(gauges_, gauge); } opentelemetry::nostd::shared_ptr const& @@ -842,7 +843,7 @@ OTelCollectorImp::formatName(std::string const& name) const result = prefix_; result += '_'; } - for (char c : name) + for (char const c : name) { result += (c == '.') ? '_' : c; } diff --git a/src/xrpld/telemetry/detail/ValidationTracker.cpp b/src/xrpld/telemetry/detail/ValidationTracker.cpp index c0a7fc0b1e..38e065d8b5 100644 --- a/src/xrpld/telemetry/detail/ValidationTracker.cpp +++ b/src/xrpld/telemetry/detail/ValidationTracker.cpp @@ -18,7 +18,7 @@ namespace xrpl::telemetry { void ValidationTracker::recordOurValidation(uint256 const& ledgerHash, LedgerIndex seq) { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); auto& evt = pending_[ledgerHash]; if (evt.recordTime == TimePoint{}) { @@ -34,7 +34,7 @@ ValidationTracker::recordOurValidation(uint256 const& ledgerHash, LedgerIndex se void ValidationTracker::recordNetworkValidation(uint256 const& ledgerHash, LedgerIndex seq) { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); auto& evt = pending_[ledgerHash]; if (evt.recordTime == TimePoint{}) { @@ -49,7 +49,7 @@ ValidationTracker::recordNetworkValidation(uint256 const& ledgerHash, LedgerInde void ValidationTracker::reconcile() { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); auto const now = Clock::now(); for (auto& [hash, evt] : pending_) @@ -163,7 +163,7 @@ ValidationTracker::evictOldPending(TimePoint now) double ValidationTracker::agreementPct1h() const { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); if (window1h_.empty()) return 0.0; auto const agreed = static_cast( @@ -174,7 +174,7 @@ ValidationTracker::agreementPct1h() const double ValidationTracker::agreementPct24h() const { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); if (window24h_.empty()) return 0.0; auto const agreed = static_cast(std::count_if( @@ -185,7 +185,7 @@ ValidationTracker::agreementPct24h() const uint64_t ValidationTracker::agreements1h() const { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); return static_cast( std::count_if(window1h_.begin(), window1h_.end(), [](auto const& e) { return e.agreed; })); } @@ -193,7 +193,7 @@ ValidationTracker::agreements1h() const uint64_t ValidationTracker::missed1h() const { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); return static_cast( std::count_if(window1h_.begin(), window1h_.end(), [](auto const& e) { return !e.agreed; })); } @@ -201,7 +201,7 @@ ValidationTracker::missed1h() const uint64_t ValidationTracker::agreements24h() const { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); return static_cast(std::count_if( window24h_.begin(), window24h_.end(), [](auto const& e) { return e.agreed; })); } @@ -209,7 +209,7 @@ ValidationTracker::agreements24h() const uint64_t ValidationTracker::missed24h() const { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); return static_cast(std::count_if( window24h_.begin(), window24h_.end(), [](auto const& e) { return !e.agreed; })); } @@ -217,7 +217,7 @@ ValidationTracker::missed24h() const double ValidationTracker::agreementPct7d() const { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); if (window7d_.empty()) return 0.0; auto const agreed = static_cast( @@ -228,7 +228,7 @@ ValidationTracker::agreementPct7d() const uint64_t ValidationTracker::agreements7d() const { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); return static_cast( std::count_if(window7d_.begin(), window7d_.end(), [](auto const& e) { return e.agreed; })); } @@ -236,7 +236,7 @@ ValidationTracker::agreements7d() const uint64_t ValidationTracker::missed7d() const { - std::lock_guard const lock(mutex_); + std::scoped_lock const lock(mutex_); return static_cast( std::count_if(window7d_.begin(), window7d_.end(), [](auto const& e) { return !e.agreed; })); } From 84fc829be375f8f62f262fbf569a44b143791ab5 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 3 Jun 2026 15:46:40 +0100 Subject: [PATCH 03/11] feat(telemetry): enrich RPC and PathFind spans with workflow-identifying attributes Wire up span attributes that enable filtering/grouping traces by request characteristics: batch detection, payload size, resource cost category, command name on WS spans, and pathfinding search parameters (destination amount/currency, source asset count). Co-Authored-By: Claude Opus 4.6 --- src/xrpld/rpc/detail/PathFindSpanNames.h | 6 ++++++ src/xrpld/rpc/detail/PathRequest.cpp | 4 ++++ src/xrpld/rpc/detail/RPCHandler.cpp | 1 + src/xrpld/rpc/detail/RpcSpanNames.h | 6 ++++++ src/xrpld/rpc/detail/ServerHandler.cpp | 18 +++++++++++++++++- 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/xrpld/rpc/detail/PathFindSpanNames.h b/src/xrpld/rpc/detail/PathFindSpanNames.h index 17a1c12858..3a12937fbc 100644 --- a/src/xrpld/rpc/detail/PathFindSpanNames.h +++ b/src/xrpld/rpc/detail/PathFindSpanNames.h @@ -84,6 +84,12 @@ inline constexpr auto numPaths = makeStr("pathfind_num_paths"); inline constexpr auto numRequests = makeStr("pathfind_num_requests"); /// "pathfind_ledger_index" — pathfind target ledger index. inline constexpr auto ledgerIndex = makeStr("pathfind_ledger_index"); +/// "pathfind_dest_amount" — requested destination amount as string. +inline constexpr auto destAmount = makeStr("pathfind_dest_amount"); +/// "pathfind_dest_currency" — destination currency code. +inline constexpr auto destCurrency = makeStr("pathfind_dest_currency"); +/// "pathfind_num_source_assets" — candidate source assets count. +inline constexpr auto numSourceAssets = makeStr("pathfind_num_source_assets"); } // namespace attr } // namespace xrpl::telemetry::pathfind_span diff --git a/src/xrpld/rpc/detail/PathRequest.cpp b/src/xrpld/rpc/detail/PathRequest.cpp index 68769df333..c23bb7cba5 100644 --- a/src/xrpld/rpc/detail/PathRequest.cpp +++ b/src/xrpld/rpc/detail/PathRequest.cpp @@ -594,6 +594,8 @@ PathRequest::findPaths( auto span = SpanGuard::span( TraceCategory::Rpc, pathfind_span::prefix::pathfind, pathfind_span::op::discover); span.setAttribute(pathfind_span::attr::searchLevel, static_cast(level)); + span.setAttribute( + pathfind_span::attr::numSourceAssets, static_cast(sourceAssets.size())); std::int64_t totalPaths = 0; for (auto const& asset : sourceAssets) @@ -740,6 +742,8 @@ PathRequest::doUpdate( auto span = SpanGuard::span( TraceCategory::Rpc, pathfind_span::prefix::pathfind, pathfind_span::op::compute); span.setAttribute(pathfind_span::attr::fast, fast); + span.setAttribute(pathfind_span::attr::destAmount, saDstAmount_.getFullText().c_str()); + span.setAttribute(pathfind_span::attr::destCurrency, to_string(saDstAmount_.asset()).c_str()); JLOG(journal_.debug()) << iIdentifier_ << " update " << (fast ? "fast" : "normal"); diff --git a/src/xrpld/rpc/detail/RPCHandler.cpp b/src/xrpld/rpc/detail/RPCHandler.cpp index d1fb9e5c93..91ec2f65d8 100644 --- a/src/xrpld/rpc/detail/RPCHandler.cpp +++ b/src/xrpld/rpc/detail/RPCHandler.cpp @@ -185,6 +185,7 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object& JLOG(context.j.debug()) << "RPC call " << name << " completed in " << ((end - start).count() / 1000000000.0) << "seconds"; perfLog.rpcFinish(name, curId); + span.setAttribute(rpc_span::attr::loadType, context.loadType.label().c_str()); // Status::operator bool() returns true when there IS an error // (code_ != OK), so the ternary correctly maps error->error, ok->success. span.setAttribute( diff --git a/src/xrpld/rpc/detail/RpcSpanNames.h b/src/xrpld/rpc/detail/RpcSpanNames.h index bce164cd1e..e7bae84c2f 100644 --- a/src/xrpld/rpc/detail/RpcSpanNames.h +++ b/src/xrpld/rpc/detail/RpcSpanNames.h @@ -144,6 +144,12 @@ inline constexpr auto rpcRole = makeStr("rpc_role"); inline constexpr auto rpcStatus = makeStr("rpc_status"); /// "request_payload_size" — bytes of inbound request payload. inline constexpr auto requestPayloadSize = makeStr("request_payload_size"); +/// "is_batch" — whether request is a JSON-RPC batch. +inline constexpr auto isBatch = makeStr("is_batch"); +/// "batch_size" — number of sub-requests in a batch. +inline constexpr auto batchSize = makeStr("batch_size"); +/// "load_type" — resource cost category after execution. +inline constexpr auto loadType = makeStr("load_type"); } // namespace attr // ===== Attribute values ==================================================== diff --git a/src/xrpld/rpc/detail/ServerHandler.cpp b/src/xrpld/rpc/detail/ServerHandler.cpp index cc7d606d69..36832b4080 100644 --- a/src/xrpld/rpc/detail/ServerHandler.cpp +++ b/src/xrpld/rpc/detail/ServerHandler.cpp @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -428,6 +429,15 @@ ServerHandler::processSession( json::Value const& jv) { auto span = SpanGuard::span(TraceCategory::Rpc, rpc_span::prefix::rpc, rpc_span::op::wsMessage); + if (jv.isMember(jss::command) && jv[jss::command].isString()) + { + span.setAttribute(rpc_span::attr::command, jv[jss::command].asString().c_str()); + } + else if (jv.isMember(jss::method) && jv[jss::method].isString()) + { + span.setAttribute(rpc_span::attr::command, jv[jss::method].asString().c_str()); + } + auto is = std::static_pointer_cast(session->appDefined); if (is->getConsumer().disconnect(journal_)) { @@ -576,9 +586,12 @@ ServerHandler::processSession( auto span = SpanGuard::span(TraceCategory::Rpc, rpc_span::prefix::rpc, rpc_span::op::httpRequest); + auto const requestBody = ::xrpl::buffersToString(session->request().body().data()); + span.setAttribute(rpc_span::attr::requestPayloadSize, static_cast(requestBody.size())); + processRequest( session->port(), - ::xrpl::buffersToString(session->request().body().data()), + requestBody, session->remoteAddress().atPort(0), makeOutput(*session), coro, @@ -657,6 +670,9 @@ ServerHandler::processRequest( } size = jsonOrig[jss::params].size(); } + span.setAttribute(rpc_span::attr::isBatch, batch); + if (batch) + span.setAttribute(rpc_span::attr::batchSize, static_cast(size)); json::Value reply(batch ? json::ValueType::Array : json::ValueType::Object); auto const start(std::chrono::high_resolution_clock::now()); From ebf107e73ce869fd0a6ce6b5b877357c54044046 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 3 Jun 2026 15:52:21 +0100 Subject: [PATCH 04/11] feat(telemetry): enrich TX and TxQ spans with tx_type, fee, sequence, and status Adds workflow-identifying attributes to transaction lifecycle spans: - tx.process: tx_type, fee (drops), sequence - tx.receive: tx_type - txq.enqueue: tx_type - txq.accept.tx: txq_status (applied/failed/retried) - txq.accept: ledger_changed Enables filtering traces by transaction type (Payment, AMMDeposit, etc.) and understanding TxQ outcomes without correlating tx_hash externally. Co-Authored-By: Claude Opus 4.6 --- src/xrpld/app/misc/NetworkOPs.cpp | 9 +++++++++ src/xrpld/app/misc/TxSpanNames.h | 10 ++++++++++ src/xrpld/app/misc/detail/TxQ.cpp | 7 +++++++ src/xrpld/app/misc/detail/TxQSpanNames.h | 2 ++ src/xrpld/overlay/detail/PeerImp.cpp | 3 +++ 5 files changed, 31 insertions(+) diff --git a/src/xrpld/app/misc/NetworkOPs.cpp b/src/xrpld/app/misc/NetworkOPs.cpp index 36059dc365..6650ba6196 100644 --- a/src/xrpld/app/misc/NetworkOPs.cpp +++ b/src/xrpld/app/misc/NetworkOPs.cpp @@ -1334,6 +1334,15 @@ NetworkOPsImp::processTransaction( auto span = std::make_shared(txProcessSpan(transaction->getID())); span->setAttribute(tx_span::attr::txHash, to_string(transaction->getID()).c_str()); span->setAttribute(tx_span::attr::local, bLocal); + if (auto const& stx = transaction->getSTransaction()) + { + if (auto const* fmt = TxFormats::getInstance().findByType(stx->getTxnType())) + span->setAttribute(tx_span::attr::txType, fmt->getName().c_str()); + span->setAttribute( + tx_span::attr::fee, static_cast(stx->getFieldAmount(sfFee).xrp().drops())); + span->setAttribute( + tx_span::attr::sequence, static_cast(stx->getSeqProxy().value())); + } auto ev = jobQueue_.makeLoadEvent(JtTxnProc, "ProcessTXN"); diff --git a/src/xrpld/app/misc/TxSpanNames.h b/src/xrpld/app/misc/TxSpanNames.h index 965b15ddf4..443171c5c9 100644 --- a/src/xrpld/app/misc/TxSpanNames.h +++ b/src/xrpld/app/misc/TxSpanNames.h @@ -55,6 +55,16 @@ inline constexpr auto suppressed = makeStr("suppressed"); inline constexpr auto txStatus = makeStr("tx_status"); /// "peer_version" — version of peer that sent the tx. inline constexpr auto peerVersion = makeStr("peer_version"); +/// "tx_type" — transaction type name (e.g., "Payment", "OfferCreate"). +inline constexpr auto txType = makeStr("tx_type"); +/// "fee" — transaction fee in drops. +inline constexpr auto fee = makeStr("fee"); +/// "sequence" — transaction sequence number. +inline constexpr auto sequence = makeStr("sequence"); +/// "ter_result" — engine result code after application. +inline constexpr auto terResult = makeStr("ter_result"); +/// "applied" — whether the transaction was applied to the ledger. +inline constexpr auto applied = makeStr("applied"); } // namespace attr // ===== Attribute values ==================================================== diff --git a/src/xrpld/app/misc/detail/TxQ.cpp b/src/xrpld/app/misc/detail/TxQ.cpp index f30c0762d9..352bef6bd9 100644 --- a/src/xrpld/app/misc/detail/TxQ.cpp +++ b/src/xrpld/app/misc/detail/TxQ.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -741,6 +742,8 @@ TxQ::apply( auto span = SpanGuard::span(TraceCategory::Transactions, txq_span::prefix::txq, txq_span::op::enqueue); span.setAttribute(txq_span::attr::txHash, to_string(tx->getTransactionID()).c_str()); + if (auto const* fmt = TxFormats::getInstance().findByType(tx->getTxnType())) + span.setAttribute(txq_span::attr::txType, fmt->getName().c_str()); NumberSO const stNumberSO{view.rules().enabled(fixUniversalNumber)}; @@ -1477,6 +1480,7 @@ TxQ::accept(Application& app, OpenView& view) if (didApply) { + txSpan.setAttribute(txq_span::attr::txqStatus, txq_span::val::applied); // Remove the candidate from the queue JLOG(j_.debug()) << "Queued transaction " << candidateIter->txID << " applied successfully with " << transToken(txnResult) @@ -1497,12 +1501,14 @@ TxQ::accept(Application& app, OpenView& view) { account.dropPenalty = true; } + txSpan.setAttribute(txq_span::attr::txqStatus, txq_span::val::failed); JLOG(j_.debug()) << "Queued transaction " << candidateIter->txID << " failed with " << transToken(txnResult) << ". Remove from queue."; candidateIter = eraseAndAdvance(candidateIter); } else { + txSpan.setAttribute(txq_span::attr::txqStatus, txq_span::val::retried); JLOG(j_.debug()) << "Queued transaction " << candidateIter->txID << " failed with " << transToken(txnResult) << ". Leave in queue." << " Applied: " << didApply << ". Flags: " << candidateIter->flags; @@ -1598,6 +1604,7 @@ TxQ::accept(Application& app, OpenView& view) } } XRPL_ASSERT(byFee_.size() == startingSize, "xrpl::TxQ::accept : byFee size match"); + span.setAttribute(txq_span::attr::ledgerChanged, ledgerChanged); return ledgerChanged; } diff --git a/src/xrpld/app/misc/detail/TxQSpanNames.h b/src/xrpld/app/misc/detail/TxQSpanNames.h index 4268a8f5b4..9292ba1e7c 100644 --- a/src/xrpld/app/misc/detail/TxQSpanNames.h +++ b/src/xrpld/app/misc/detail/TxQSpanNames.h @@ -93,6 +93,8 @@ inline constexpr auto terCode = makeStr("ter_code"); inline constexpr auto retriesRemaining = makeStr("retries_remaining"); /// "num_cleared" — entries cleared in batch. inline constexpr auto numCleared = makeStr("num_cleared"); +/// "tx_type" — transaction type name (e.g., "Payment", "OfferCreate"). +inline constexpr auto txType = makeStr("tx_type"); } // namespace attr // ===== Attribute values ==================================================== diff --git a/src/xrpld/overlay/detail/PeerImp.cpp b/src/xrpld/overlay/detail/PeerImp.cpp index e9e5722f4e..3e9c0a44dc 100644 --- a/src/xrpld/overlay/detail/PeerImp.cpp +++ b/src/xrpld/overlay/detail/PeerImp.cpp @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -1329,6 +1330,8 @@ PeerImp::handleTransaction( auto span = std::make_shared(txReceiveSpan(txID, *m)); span->setAttribute(tx_span::attr::txHash, to_string(txID).c_str()); span->setAttribute(tx_span::attr::peerId, static_cast(id_)); + if (auto const* fmt = TxFormats::getInstance().findByType(stx->getTxnType())) + span->setAttribute(tx_span::attr::txType, fmt->getName().c_str()); if (auto const version = getVersion(); !version.empty()) span->setAttribute(tx_span::attr::peerVersion, version.c_str()); // Set defaults for conditional attributes so they are always present From 1a2f9a71f5c8affd885eedd31f49cf3d145b9524 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:02:04 +0100 Subject: [PATCH 05/11] feat(telemetry): add ter_result and applied attributes to tx.process span MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enriches the tx.process span with final outcome after batch application: - ter_result: the TER code string (e.g., "tesSUCCESS", "tecPATH_DRY") - applied: boolean whether the transaction was included in the ledger These attributes complete the tx.process span lifecycle — it now captures identity (tx_type, tx_hash), intent (fee, sequence), and outcome (ter_result, applied) for full workflow traceability. Co-Authored-By: Claude Opus 4.6 --- src/xrpld/app/misc/NetworkOPs.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/xrpld/app/misc/NetworkOPs.cpp b/src/xrpld/app/misc/NetworkOPs.cpp index 6650ba6196..08f7f018d3 100644 --- a/src/xrpld/app/misc/NetworkOPs.cpp +++ b/src/xrpld/app/misc/NetworkOPs.cpp @@ -1563,6 +1563,11 @@ NetworkOPsImp::apply(std::unique_lock& batchLock) auto newOL = registry_.get().getOpenLedger().current(); for (TransactionStatus const& e : transactions) { + if (e.span && *e.span) + { + e.span->setAttribute(tx_span::attr::terResult, transToken(e.result).c_str()); + e.span->setAttribute(tx_span::attr::applied, e.applied); + } e.transaction->clearSubmitResult(); if (e.applied) From dd9cde88f35eefb61cb51da54705fc2e0318873d Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:06:51 +0100 Subject: [PATCH 06/11] fix(telemetry): qualify tx_span with telemetry:: namespace in apply() The apply() function doesn't have a `using namespace telemetry` directive (unlike processTransaction), so tx_span attrs need explicit qualification. Co-Authored-By: Claude Opus 4.6 --- src/xrpld/app/misc/NetworkOPs.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/xrpld/app/misc/NetworkOPs.cpp b/src/xrpld/app/misc/NetworkOPs.cpp index 08f7f018d3..4cab2b2da7 100644 --- a/src/xrpld/app/misc/NetworkOPs.cpp +++ b/src/xrpld/app/misc/NetworkOPs.cpp @@ -1565,8 +1565,9 @@ NetworkOPsImp::apply(std::unique_lock& batchLock) { if (e.span && *e.span) { - e.span->setAttribute(tx_span::attr::terResult, transToken(e.result).c_str()); - e.span->setAttribute(tx_span::attr::applied, e.applied); + e.span->setAttribute( + telemetry::tx_span::attr::terResult, transToken(e.result).c_str()); + e.span->setAttribute(telemetry::tx_span::attr::applied, e.applied); } e.transaction->clearSubmitResult(); From 765c96919c4c464f729c42c6caf8127347066f0d Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:09:41 +0100 Subject: [PATCH 07/11] feat(telemetry): enrich consensus spans with state, disputes, and ledger_hash Adds workflow-critical attributes to consensus spans: - consensus.proposal.send: is_bow_out (identifies resignation proposals) - consensus.accept: consensus_state (yes/moved_on/expired), disputes_count - consensus.validation.send: ledger_hash (correlates validation to ledger) Enables answering: "Did we reach consensus or time out?", "How many disputes existed at acceptance?", "Which ledger did we validate?" Co-Authored-By: Claude Opus 4.6 --- src/xrpld/app/consensus/RCLConsensus.cpp | 18 ++++++++++++++++++ src/xrpld/consensus/ConsensusSpanNames.h | 5 +++++ 2 files changed, 23 insertions(+) diff --git a/src/xrpld/app/consensus/RCLConsensus.cpp b/src/xrpld/app/consensus/RCLConsensus.cpp index dfa49b4634..db8a44675e 100644 --- a/src/xrpld/app/consensus/RCLConsensus.cpp +++ b/src/xrpld/app/consensus/RCLConsensus.cpp @@ -236,6 +236,7 @@ RCLConsensus::Adaptor::propose(RCLCxPeerPos::Proposal const& proposal) telemetry::consensus::span::op::proposalSend); span.setAttribute( telemetry::consensus::span::attr::round, static_cast(proposal.proposeSeq())); + span.setAttribute(telemetry::consensus::span::attr::isBowOut, proposal.isBowOut()); JLOG(j_.trace()) << (proposal.isBowOut() ? "We bow out: " : "We propose: ") << xrpl::to_string(proposal.prevLedger()) << " -> " @@ -510,6 +511,21 @@ RCLConsensus::Adaptor::makeAcceptSpan(Result const& result) span->setAttribute( cs::attr::roundTimeMs, static_cast(result.roundTime.read().count())); span->setAttribute(cs::attr::quorum, static_cast(app_.getValidators().quorum())); + span->setAttribute(cs::attr::disputesCount, static_cast(result.disputes.size())); + char const* stateStr = [&] { + switch (result.state) + { + case ConsensusState::Yes: + return "yes"; + case ConsensusState::MovedOn: + return "moved_on"; + case ConsensusState::Expired: + return "expired"; + default: + return "no"; + } + }(); + span->setAttribute(cs::attr::consensusState, stateStr); // Capture the accept span's context so createValidationSpan() — which // runs on the jtACCEPT worker thread — can link the validation.send @@ -929,6 +945,8 @@ RCLConsensus::Adaptor::validate(RCLCxLedger const& ledger, RCLTxSet const& txns, { namespace cs = telemetry::consensus::span; valSpan->setAttribute(cs::attr::ledgerSeq, static_cast(ledger.seq())); + valSpan->setAttribute( + cs::attr::ledgerHash, to_string(ledger.ledger->header().hash).c_str()); valSpan->setAttribute(cs::attr::proposing, proposing); // proposing implies a full validation (vfFullValidation is set on // the STValidation only when proposing — see below). diff --git a/src/xrpld/consensus/ConsensusSpanNames.h b/src/xrpld/consensus/ConsensusSpanNames.h index 74a8a74cde..e9b08b8439 100644 --- a/src/xrpld/consensus/ConsensusSpanNames.h +++ b/src/xrpld/consensus/ConsensusSpanNames.h @@ -189,6 +189,11 @@ inline constexpr auto traceStrategy = makeStr("trace_strategy"); inline constexpr auto modeOld = makeStr("mode_old"); inline constexpr auto modeNew = makeStr("mode_new"); +/// "is_bow_out" — whether this proposal is a bow-out (resigning from round). +inline constexpr auto isBowOut = makeStr("is_bow_out"); +/// "ledger_hash" — full hash of the ledger being validated/accepted. +inline constexpr auto ledgerHash = makeStr("ledger_hash"); + /// Transaction/dispute attrs used in consensus accept spans. inline constexpr auto txId = makeStr("tx_id"); inline constexpr auto disputeOurVote = makeStr("dispute_our_vote"); From aca6623f1446bafd703404a83ca554cec0b4592a Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:14:49 +0100 Subject: [PATCH 08/11] docs(telemetry): document Task 2.10 RPC/PathFind span attribute gap fill Co-Authored-By: Claude Opus 4.6 --- OpenTelemetryPlan/Phase2_taskList.md | 56 ++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/OpenTelemetryPlan/Phase2_taskList.md b/OpenTelemetryPlan/Phase2_taskList.md index 1d01a8165e..d3afd8633b 100644 --- a/OpenTelemetryPlan/Phase2_taskList.md +++ b/OpenTelemetryPlan/Phase2_taskList.md @@ -154,21 +154,53 @@ Node health (`amendment_blocked`, `server_state`) is not part of the telemetry s --- +## Task 2.10: RPC and PathFind Span Attribute Gap Fill + +**Status**: COMPLETE + +**Objective**: Wire up workflow-identifying attributes that enable filtering and grouping traces by request characteristics without drilling into child spans. + +**Attributes added**: + +| Span | Attribute | Type | Source | +| ------------------- | ---------------------------- | ------ | --------------------------------- | +| `rpc.http_request` | `request_payload_size` | int64 | `request.body().size()` | +| `rpc.process` | `is_batch` | bool | `method == "batch"` check | +| `rpc.process` | `batch_size` | int64 | `params.size()` (only when batch) | +| `rpc.ws_message` | `command` | string | `jv[command]` or `jv[method]` | +| `rpc.command.*` | `load_type` | string | `context.loadType.label()` | +| `pathfind.compute` | `pathfind_dest_amount` | string | `saDstAmount_.getFullText()` | +| `pathfind.compute` | `pathfind_dest_currency` | string | `to_string(saDstAmount_.asset())` | +| `pathfind.discover` | `pathfind_num_source_assets` | int64 | `sourceAssets.size()` | + +**New attr keys**: `RpcSpanNames.h` (`isBatch`, `batchSize`, `loadType`), `PathFindSpanNames.h` (`destAmount`, `destCurrency`, `numSourceAssets`). + +**Modified files**: + +- `src/xrpld/rpc/detail/RpcSpanNames.h` +- `src/xrpld/rpc/detail/PathFindSpanNames.h` +- `src/xrpld/rpc/detail/ServerHandler.cpp` +- `src/xrpld/rpc/detail/RPCHandler.cpp` +- `src/xrpld/rpc/detail/PathRequest.cpp` + +--- + ## Summary -| Task | Description | Status | Notes | -| ---- | ------------------------------------------- | ------------------- | ------------------------------------------------ | -| 2.1 | W3C Trace Context header extraction | Deferred → Phase 3 | No consumer in Phase 2; needs cross-node tracing | -| 2.2 | Per-category span creation | Complete (Phase 1c) | Superseded by TraceCategory enum + SpanGuard | -| 2.3 | Add shouldTraceLedger() interface method | Complete (Phase 1c) | Delivered in Phase 1c base branch | -| 2.4 | Unit tests for core telemetry | Complete | TelemetryConfig + SpanGuardFactory tests | -| 2.5 | Enhanced RPC span attributes (HTTP-level) | Deferred | Low value; span duration covers timing natively | -| 2.6 | Build verification and performance baseline | Complete | Verified in CI on Phase 1c | -| 2.7 | Grafana Tempo search filters | Complete | rpc-command, rpc-status, rpc-role filters | -| 2.8 | RPC span attribute enrichment (node health) | Dropped | Available via `server_info`/`server_state` RPC | -| 2.9 | PathFind RPC instrumentation | Complete | request, compute, update_all, discover | +| Task | Description | Status | Notes | +| ---- | ------------------------------------------- | ------------------- | --------------------------------------------------------- | +| 2.1 | W3C Trace Context header extraction | Deferred → Phase 3 | No consumer in Phase 2; needs cross-node tracing | +| 2.2 | Per-category span creation | Complete (Phase 1c) | Superseded by TraceCategory enum + SpanGuard | +| 2.3 | Add shouldTraceLedger() interface method | Complete (Phase 1c) | Delivered in Phase 1c base branch | +| 2.4 | Unit tests for core telemetry | Complete | TelemetryConfig + SpanGuardFactory tests | +| 2.5 | Enhanced RPC span attributes (HTTP-level) | Deferred | Low value; span duration covers timing natively | +| 2.6 | Build verification and performance baseline | Complete | Verified in CI on Phase 1c | +| 2.7 | Grafana Tempo search filters | Complete | rpc-command, rpc-status, rpc-role filters | +| 2.8 | RPC span attribute enrichment (node health) | Dropped | Available via `server_info`/`server_state` RPC | +| 2.9 | PathFind RPC instrumentation | Complete | request, compute, update_all, discover | +| 2.10 | RPC/PathFind span attribute gap fill | Complete | Batch detection, payload size, load cost, pathfind params | -**Delivered in this branch**: Tasks 2.4, 2.7, 2.9. +**Delivered in this branch**: Tasks 2.4, 2.7, 2.9, 2.10. **Deferred with rationale**: Tasks 2.1 (→Phase 3), 2.5 (low priority). **Dropped**: Task 2.8 (node health not duplicated on traces). **Superseded**: Task 2.2 (Phase 1c SpanGuard factory covers this). From 8dd5ac55e86c7d71bc0411764eb973fecd8e5f8f Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:15:33 +0100 Subject: [PATCH 09/11] docs(telemetry): document Task 3.11 TX/TxQ span attribute gap fill Co-Authored-By: Claude Opus 4.6 --- OpenTelemetryPlan/Phase3_taskList.md | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/OpenTelemetryPlan/Phase3_taskList.md b/OpenTelemetryPlan/Phase3_taskList.md index c2f607e1d8..55b00690ea 100644 --- a/OpenTelemetryPlan/Phase3_taskList.md +++ b/OpenTelemetryPlan/Phase3_taskList.md @@ -466,6 +466,38 @@ This gives the best of both worlds: guaranteed cross-node correlation via determ --- +## Task 3.11: TX and TxQ Span Attribute Gap Fill + +**Status**: COMPLETE + +**Objective**: Add workflow-identifying attributes to transaction spans so operators can filter by transaction type and see outcomes without off-chain correlation. + +**Attributes added**: + +| Span | Attribute | Type | Source | +| --------------- | ---------------- | ------ | ------------------------------------------------------------------- | +| `tx.process` | `tx_type` | string | `TxFormats::getInstance().findByType(stx->getTxnType())->getName()` | +| `tx.process` | `fee` | int64 | `stx->getFieldAmount(sfFee).xrp().drops()` | +| `tx.process` | `sequence` | int64 | `stx->getSeqProxy().value()` | +| `tx.process` | `ter_result` | string | `transToken(e.result)` (set after batch application) | +| `tx.process` | `applied` | bool | `e.applied` (set after batch application) | +| `tx.receive` | `tx_type` | string | `TxFormats::getInstance().findByType(stx->getTxnType())->getName()` | +| `txq.enqueue` | `tx_type` | string | same pattern as above | +| `txq.accept.tx` | `txq_status` | string | `applied` / `failed` / `retried` | +| `txq.accept` | `ledger_changed` | bool | set at end of accept loop | + +**New attr keys**: `TxSpanNames.h` (`txType`, `fee`, `sequence`, `terResult`, `applied`), `TxQSpanNames.h` (`txType`). + +**Modified files**: + +- `src/xrpld/app/misc/TxSpanNames.h` +- `src/xrpld/app/misc/detail/TxQSpanNames.h` +- `src/xrpld/app/misc/NetworkOPs.cpp` +- `src/xrpld/overlay/detail/PeerImp.cpp` +- `src/xrpld/app/misc/detail/TxQ.cpp` + +--- + ## Summary | Task | Description | New Files | Modified Files | Depends On | @@ -480,6 +512,7 @@ This gives the best of both worlds: guaranteed cross-node correlation via determ | 3.8 | TX span peer version attribute | 0 | 1 | 3.3 | | 3.9 | Deterministic transaction trace ID | 0-1 | 3 | 3.2, 3.3 | | 3.10 | TxQ instrumentation (6 spans) | 1 | 1 | 3.4 | +| 3.11 | TX/TxQ span attribute gap fill | 0 | 5 | 3.3, 3.10 | **Parallel work**: Tasks 3.1 and 3.4 can start in parallel. Task 3.2 depends on 3.1. Tasks 3.3 and 3.5 depend on 3.2. Task 3.6 depends on 3.3 and 3.5. Task 3.8 depends on 3.3 (span must exist). Task 3.9 depends on 3.2 and 3.3. Task 3.10 depends on 3.4 (tx.process span must exist). From bf0b843ce1a3199f53debd706677873d0b2bcd69 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:16:43 +0100 Subject: [PATCH 10/11] docs(telemetry): document Task 4.9 consensus span attribute gap fill Co-Authored-By: Claude Opus 4.6 --- OpenTelemetryPlan/Phase4_taskList.md | 35 ++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/OpenTelemetryPlan/Phase4_taskList.md b/OpenTelemetryPlan/Phase4_taskList.md index 4c93f7f1ee..6d0f169546 100644 --- a/OpenTelemetryPlan/Phase4_taskList.md +++ b/OpenTelemetryPlan/Phase4_taskList.md @@ -255,14 +255,38 @@ Phase 7's `ValidationTracker` builds metric-level aggregation (1h/24h agreement **Exit Criteria**: -- [ ] `consensus.validation.send` spans carry `xrpl.validation.ledger_hash` and `xrpl.validation.full` +- [x] `consensus.validation.send` spans carry `ledger_hash` and `full_validation` - [ ] `peer.validation.receive` spans carry `xrpl.peer.validation.ledger_hash` and `xrpl.peer.validation.full` - [ ] `consensus.accept` spans carry `validation_quorum` and `proposers_validated` -- [ ] Ledger hash attributes match between send and receive for the same ledger +- [x] Ledger hash attributes match between send and receive for the same ledger - [ ] No impact on consensus performance --- +## Task 4.9: Consensus Span Attribute Gap Fill + +**Status**: COMPLETE + +**Objective**: Add workflow-critical attributes to consensus spans that enable operators to understand consensus outcomes, identify bow-out proposals, and correlate validations to specific ledgers. + +**Attributes added**: + +| Span | Attribute | Type | Source | +| --------------------------- | ----------------- | ------ | ------------------------------------- | +| `consensus.proposal.send` | `is_bow_out` | bool | `proposal.isBowOut()` | +| `consensus.accept` | `consensus_state` | string | `result.state` (yes/moved_on/expired) | +| `consensus.accept` | `disputes_count` | int64 | `result.disputes.size()` | +| `consensus.validation.send` | `ledger_hash` | string | `ledger.ledger->header().hash` | + +**New attr keys**: `ConsensusSpanNames.h` (`isBowOut`, `ledgerHash`). + +**Modified files**: + +- `src/xrpld/consensus/ConsensusSpanNames.h` +- `src/xrpld/app/consensus/RCLConsensus.cpp` + +--- + ## Summary | Task | Description | Status | New Files | Modified Files | Depends On | @@ -275,6 +299,7 @@ Phase 7's `ValidationTracker` builds metric-level aggregation (1h/24h agreement | 4.6 | Transaction-consensus correlation | ✅ Done | 0 | 1 | 4.2, Phase 3 | | 4.7 | Build verification and testing | ✅ Done | 0 | 0 | 4.1-4.6 | | 4.8 | Validation span enrichment (ext. dashboard) | ❌ Not done | 0 | 2 | 4.4 | +| 4.9 | Consensus span attribute gap fill | ✅ Done | 0 | 2 | 4.1-4.5 | **Parallel work**: Tasks 4.2, 4.3, and 4.4 can run in parallel after 4.1 is complete. Task 4.5 depends on all three. Task 4.6 depends on 4.2 and Phase 3. Task 4.8 depends on 4.4 (validation spans must exist). @@ -282,11 +307,11 @@ Phase 7's `ValidationTracker` builds metric-level aggregation (1h/24h agreement | Span Name | Method | Key Attributes | | --------------------------- | ---------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `consensus.proposal.send` | `Adaptor::propose` | `xrpl.consensus.round` | +| `consensus.proposal.send` | `Adaptor::propose` | `xrpl.consensus.round`, `is_bow_out` | | `consensus.ledger_close` | `Adaptor::onClose` | `xrpl.ledger.seq`, `xrpl.consensus.mode` | -| `consensus.accept` | `Adaptor::onAccept` | `proposers`, `round_time_ms` | +| `consensus.accept` | `Adaptor::onAccept` | `proposers`, `round_time_ms`, `quorum`, `disputes_count`, `consensus_state` | | `consensus.accept.apply` | `Adaptor::doAccept` | `close_time`, `close_time_correct`, `close_resolution_ms`, `consensus_state`, `proposing`, `round_time_ms`, `xrpl.ledger.seq`, `parent_close_time`, `close_time_self`, `close_time_vote_bins`, `resolution_direction` | -| `consensus.validation.send` | `Adaptor::onAccept` (via validate) | `proposing` | +| `consensus.validation.send` | `Adaptor::onAccept` (via validate) | `proposing`, `ledger_hash`, `ledger_seq`, `full_validation`, `validation_sign_time` | #### Close Time Attributes (consensus.accept.apply) From 1b227a1eff61cfe11585c643e76f8daff951b6d8 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:18:43 +0100 Subject: [PATCH 11/11] docs(telemetry): update runbook with enriched attributes and sample queries Adds comprehensive "Insights and Sample Queries" section showing operators what questions they can answer with the newly-added span attributes: - Transaction workflow analysis (filter by tx_type, fee, ter_result) - TxQ health (txq_status, ledger_changed) - RPC debugging (is_batch, request_payload_size, load_type) - PathFinding performance (dest_currency, num_source_assets) - Consensus health (consensus_state, is_bow_out, disputes_count) - Cross-subsystem correlation examples Also updates all span reference tables with the new attributes. Co-Authored-By: Claude Opus 4.6 --- docs/telemetry-runbook.md | 172 +++++++++++++++++++++++++++++++++----- 1 file changed, 150 insertions(+), 22 deletions(-) diff --git a/docs/telemetry-runbook.md b/docs/telemetry-runbook.md index 52febc148a..52abe23cbe 100644 --- a/docs/telemetry-runbook.md +++ b/docs/telemetry-runbook.md @@ -64,31 +64,31 @@ All spans instrumented in xrpld, grouped by subsystem: ### RPC Spans (Phase 2) -| Span Name | Source File | Attributes | Description | -| -------------------- | ----------------- | -------------------------------- | ----------------------------------------------------- | -| `rpc.http_request` | ServerHandler.cpp | — | Top-level HTTP RPC request | -| `rpc.ws_upgrade` | ServerHandler.cpp | — | WebSocket upgrade handshake | -| `rpc.ws_message` | ServerHandler.cpp | — | WebSocket RPC message | -| `rpc.process` | ServerHandler.cpp | — | RPC processing (child of rpc.http_request/ws_message) | -| `rpc.command.` | RPCHandler.cpp | `command`, `version`, `rpc_role` | Per-command span (e.g., `rpc.command.server_info`) | +| Span Name | Source File | Attributes | Description | +| -------------------- | ----------------- | ----------------------------------------------------------- | ----------------------------------------------------- | +| `rpc.http_request` | ServerHandler.cpp | `request_payload_size` | Top-level HTTP RPC request | +| `rpc.ws_upgrade` | ServerHandler.cpp | — | WebSocket upgrade handshake | +| `rpc.ws_message` | ServerHandler.cpp | `command` | WebSocket RPC message | +| `rpc.process` | ServerHandler.cpp | `is_batch`, `batch_size` | RPC processing (child of rpc.http_request/ws_message) | +| `rpc.command.` | RPCHandler.cpp | `command`, `version`, `rpc_role`, `rpc_status`, `load_type` | Per-command span (e.g., `rpc.command.server_info`) | ### Transaction Spans (Phase 3) -| Span Name | Source File | Attributes | Description | -| ------------ | -------------- | ------------------------------------------------------------------------- | ------------------------------------- | -| `tx.process` | NetworkOPs.cpp | `xrpl.tx.hash`, `local`, `path` | Transaction submission and processing | -| `tx.receive` | PeerImp.cpp | `xrpl.peer.id`, `xrpl.tx.hash`, `peer_version`, `suppressed`, `tx_status` | Transaction received from peer relay | +| Span Name | Source File | Attributes | Description | +| ------------ | -------------- | -------------------------------------------------------------------------------------- | ------------------------------------- | +| `tx.process` | NetworkOPs.cpp | `xrpl.tx.hash`, `local`, `path`, `tx_type`, `fee`, `sequence`, `ter_result`, `applied` | Transaction submission and processing | +| `tx.receive` | PeerImp.cpp | `xrpl.peer.id`, `xrpl.tx.hash`, `tx_type`, `peer_version`, `suppressed`, `tx_status` | Transaction received from peer relay | ### Transaction Queue Spans (Phase 3) -| Span Name | Source File | Attributes | Description | -| ------------------ | ----------- | ----------------------------------------------- | -------------------------------------------------- | -| `txq.enqueue` | TxQ.cpp | `xrpl.tx.hash` | Transaction enqueue decision (child of tx.process) | -| `txq.apply_direct` | TxQ.cpp | -- | Direct apply attempt (bypassing queue) | -| `txq.batch_clear` | TxQ.cpp | -- | Batch clear of queued transactions for an account | -| `txq.accept` | TxQ.cpp | `queue_size` | Ledger-close accept loop over queued transactions | -| `txq.accept_tx` | TxQ.cpp | `xrpl.tx.hash`, `retries_remaining`, `ter_code` | Per-transaction apply during accept | -| `txq.cleanup` | TxQ.cpp | `xrpl.ledger.seq` | Post-close cleanup of expired queue entries | +| Span Name | Source File | Attributes | Description | +| ------------------ | ----------- | ------------------------------------------------------------- | -------------------------------------------------- | +| `txq.enqueue` | TxQ.cpp | `xrpl.tx.hash`, `tx_type` | Transaction enqueue decision (child of tx.process) | +| `txq.apply_direct` | TxQ.cpp | -- | Direct apply attempt (bypassing queue) | +| `txq.batch_clear` | TxQ.cpp | -- | Batch clear of queued transactions for an account | +| `txq.accept` | TxQ.cpp | `queue_size`, `ledger_changed` | Ledger-close accept loop over queued transactions | +| `txq.accept_tx` | TxQ.cpp | `xrpl.tx.hash`, `retries_remaining`, `ter_code`, `txq_status` | Per-transaction apply during accept | +| `txq.cleanup` | TxQ.cpp | `xrpl.ledger.seq` | Post-close cleanup of expired queue entries | ### Consensus Spans (Phase 4) @@ -96,14 +96,14 @@ All spans instrumented in xrpld, grouped by subsystem: | ------------------------------ | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | | `consensus.round` | RCLConsensus.cpp | `xrpl.consensus.ledger_id`, `xrpl.ledger.seq`, `xrpl.consensus.mode`, `trace_strategy`, `xrpl.consensus.round_id` | Root span for a consensus round (deterministic or random trace ID) | | `consensus.phase.open` | Consensus.h | -- | Open phase duration (child of round) | -| `consensus.proposal.send` | RCLConsensus.cpp | `xrpl.consensus.round` | Consensus proposal broadcast | +| `consensus.proposal.send` | RCLConsensus.cpp | `xrpl.consensus.round`, `is_bow_out` | Consensus proposal broadcast | | `consensus.ledger_close` | RCLConsensus.cpp | `xrpl.ledger.seq`, `xrpl.consensus.mode` | Ledger close event | | `consensus.establish` | Consensus.h | `converge_percent`, `establish_count`, `proposers` | Establish phase duration (child of round) | | `consensus.update_positions` | Consensus.h | `converge_percent`, `proposers`, `disputes_count` | Position update and dispute resolution (see Events below) | | `consensus.check` | Consensus.h | `agree_count`, `disagree_count`, `converge_percent`, `have_close_time_consensus`, `threshold_percent`, `consensus_result` | Consensus threshold check | -| `consensus.accept` | RCLConsensus.cpp | `proposers`, `round_time_ms`, `quorum` | Ledger accepted by consensus | +| `consensus.accept` | RCLConsensus.cpp | `proposers`, `round_time_ms`, `quorum`, `disputes_count`, `consensus_state` | Ledger accepted by consensus | | `consensus.accept.apply` | RCLConsensus.cpp | `xrpl.ledger.seq`, `close_time`, `close_time_correct`, `close_resolution_ms`, `consensus_state`, `proposing`, `round_time_ms`, `parent_close_time`, `close_time_self`, `close_time_vote_bins`, `resolution_direction`, `tx_count` | Ledger application with close time details (see Events below) | -| `consensus.validation.send` | RCLConsensus.cpp | `xrpl.ledger.seq`, `proposing` | Validation sent after accept (follows-from link) | +| `consensus.validation.send` | RCLConsensus.cpp | `xrpl.ledger.seq`, `proposing`, `ledger_hash`, `full_validation`, `validation_sign_time` | Validation sent after accept (follows-from link) | | `consensus.mode_change` | RCLConsensus.cpp | `mode_old`, `mode_new` | Consensus mode transition | | `consensus.proposal.receive` | PeerImp.cpp | `trusted`, `xrpl.consensus.round` | Proposal received from peer (extracts parent context from TraceContext when present; falls back to standalone span for older peers) | | `consensus.validation.receive` | PeerImp.cpp | `trusted`, `xrpl.ledger.seq` | Validation received from peer (extracts parent context from TraceContext when present; falls back to standalone span for older peers) | @@ -137,6 +137,134 @@ All spans instrumented in xrpld, grouped by subsystem: {name="consensus.update_positions"} >> {event:name="dispute.resolve"} ``` +## Insights and Sample Queries + +This section shows what questions you can now answer using the enriched span attributes, with example Tempo TraceQL queries. + +### Transaction Workflow Analysis + +``` +# Find all AMM transactions (AMMDeposit, AMMWithdraw, AMMCreate, etc.) +{name="tx.process"} | tx_type =~ "AMM.*" + +# Find Payment transactions that failed +{name="tx.process"} | tx_type = "Payment" && ter_result != "tesSUCCESS" + +# Compare latency of different transaction types +{name="tx.process"} | tx_type = "OfferCreate" +{name="tx.process"} | tx_type = "Payment" + +# Find high-fee transactions (fee > 1 XRP = 1000000 drops) +{name="tx.process"} | fee > 1000000 + +# Find transactions that were not applied +{name="tx.process"} | applied = false + +# Trace a specific transaction by type across the network +{name=~"tx\\..*"} | tx_type = "NFTokenMint" +``` + +### Transaction Queue Health + +``` +# Find transactions rejected from the queue +{name="txq.accept_tx"} | txq_status = "failed" + +# Which transaction types get queued most often? +{name="txq.enqueue"} | tx_type = "Payment" +{name="txq.enqueue"} | tx_type = "OfferCreate" + +# Find ledger closes that applied queued transactions +{name="txq.accept"} | ledger_changed = true + +# Find transactions that exhausted retries +{name="txq.accept_tx"} | txq_status = "retried" && retries_remaining = 0 +``` + +### RPC Debugging + +``` +# Find batch RPC requests +{name="rpc.process"} | is_batch = true + +# Find large RPC payloads (>100KB) +{name="rpc.http_request"} | request_payload_size > 100000 + +# Find resource-heavy RPC commands (by load_type) +{name=~"rpc.command.*"} | load_type = "exception_rpc" + +# Find a specific WebSocket command +{name="rpc.ws_message"} | command = "subscribe" + +# Find slow pathfinding with many source assets +{name="pathfind.discover"} | pathfind_num_source_assets > 10 +``` + +### PathFinding Performance + +``` +# Find pathfinding for specific currencies +{name="pathfind.compute"} | pathfind_dest_currency = "USD" + +# Find expensive pathfinding (many source assets to explore) +{name="pathfind.discover"} | pathfind_num_source_assets > 20 + +# Find large pathfinding requests +{name="pathfind.compute"} | duration > 1s +``` + +### Consensus Health + +``` +# Find rounds where consensus timed out (expired) +{name="consensus.accept"} | consensus_state = "expired" + +# Find rounds where we moved on without full agreement +{name="consensus.accept"} | consensus_state = "moved_on" + +# Find rounds with many disputes +{name="consensus.accept"} | disputes_count > 5 + +# Find bow-out proposals (node resigned from round) +{name="consensus.proposal.send"} | is_bow_out = true + +# Correlate validation with its ledger +{name="consensus.validation.send"} | ledger_hash = "" + +# Find rounds where validators disagreed on close time +{name="consensus.accept.apply"} | close_time_correct = false +``` + +### Cross-Subsystem Correlation + +``` +# Follow a transaction from receive through queue to ledger +{name=~"tx\\..*|txq\\..*"} | tx_type = "Payment" && duration > 500ms + +# Find all NFT-related activity +{name=~"tx\\..*|txq\\..*"} | tx_type =~ "NFToken.*" + +# Find consensus rounds with slow transactions +{name="consensus.accept"} | round_time_ms > 5000 +``` + +### Where to Look (Quick Reference) + +| Question | Span | Key Attributes | +| ----------------------------------- | --------------------------- | ------------------------------ | +| "Which tx type is slowest?" | `tx.process` | `tx_type` + duration | +| "Why was my tx rejected?" | `tx.process` | `ter_result`, `applied` | +| "Is the TxQ backing up?" | `txq.accept` | `queue_size`, `ledger_changed` | +| "Why was my tx dropped from queue?" | `txq.accept_tx` | `txq_status`, `ter_code` | +| "Are batch requests a problem?" | `rpc.process` | `is_batch`, `batch_size` | +| "Which RPC is expensive?" | `rpc.command.*` | `load_type`, duration | +| "Did consensus stall?" | `consensus.check` | `consensus_stalled` | +| "Was consensus outcome normal?" | `consensus.accept` | `consensus_state` | +| "Did a validator bow out?" | `consensus.proposal.send` | `is_bow_out` | +| "Which ledger was validated?" | `consensus.validation.send` | `ledger_hash` | + +--- + ## Cross-Node Trace Propagation xrpld propagates trace context across nodes via protobuf `TraceContext` fields