From bc49eb6f832c771db9191138c8bb43a93aff336f Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Tue, 28 Apr 2026 16:16:53 +0100 Subject: [PATCH] feat(telemetry): complete Phase 4 consensus tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement remaining Phase 4/4a consensus tracing tasks: - Add consensus.phase.open span (open → closeLedger lifecycle) - Add consensus.proposal.receive span in PeerImp with trusted attr - Add consensus.validation.receive span in PeerImp with trusted/seq attrs - Add tx_count attr on accept.apply, disputes_count on update_positions - Add tx.included events with txId in doAccept transaction loop - Enhance dispute.resolve event with yays/nays fields - Add avalanche_threshold attr on update_positions span - Reparent accept/accept.apply as children of round span via childSpan() Also adds compile-time constants in ConsensusSpanNames.h and updates the span hierarchy diagram. Co-Authored-By: Claude Opus 4.6 --- .../scripts/levelization/results/loops.txt | 8 ++++++- .../scripts/levelization/results/ordering.txt | 8 +++---- src/xrpld/app/consensus/ConsensusSpanNames.h | 17 +++++++++++++++ src/xrpld/app/consensus/RCLConsensus.cpp | 15 ++++++++----- src/xrpld/app/misc/detail/TxQ.cpp | 2 +- src/xrpld/consensus/Consensus.h | 20 +++++++++++++++++- src/xrpld/overlay/detail/PeerImp.cpp | 21 +++++++++++++++++++ 7 files changed, 78 insertions(+), 13 deletions(-) diff --git a/.github/scripts/levelization/results/loops.txt b/.github/scripts/levelization/results/loops.txt index 181cbec44a..463a35e822 100644 --- a/.github/scripts/levelization/results/loops.txt +++ b/.github/scripts/levelization/results/loops.txt @@ -5,7 +5,10 @@ Loop: test.jtx test.unit_test test.unit_test ~= test.jtx Loop: xrpl.telemetry xrpld.rpc - xrpld.rpc ~= xrpl.telemetry + xrpld.rpc > xrpl.telemetry + +Loop: xrpld.app xrpld.consensus + xrpld.app > xrpld.consensus Loop: xrpld.app xrpld.overlay xrpld.app > xrpld.overlay @@ -19,6 +22,9 @@ Loop: xrpld.app xrpld.rpc Loop: xrpld.app xrpld.shamap xrpld.shamap > xrpld.app +Loop: xrpld.app xrpld.telemetry + xrpld.telemetry == xrpld.app + Loop: xrpld.overlay xrpld.rpc xrpld.rpc ~= xrpld.overlay diff --git a/.github/scripts/levelization/results/ordering.txt b/.github/scripts/levelization/results/ordering.txt index 62b51b4a4f..1d8ed01560 100644 --- a/.github/scripts/levelization/results/ordering.txt +++ b/.github/scripts/levelization/results/ordering.txt @@ -101,7 +101,6 @@ test.core > xrpl.server test.csf > xrpl.basics test.csf > xrpld.consensus test.csf > xrpl.json -test.csf > xrpl.telemetry test.csf > xrpl.ledger test.csf > xrpl.protocol test.json > test.jtx @@ -196,7 +195,6 @@ tests.libxrpl > xrpl.net tests.libxrpl > xrpl.protocol tests.libxrpl > xrpl.protocol_autogen tests.libxrpl > xrpl.telemetry -tests.libxrpl > xrpld.telemetry xrpl.conditions > xrpl.basics xrpl.conditions > xrpl.protocol xrpl.core > xrpl.basics @@ -238,9 +236,7 @@ xrpl.tx > xrpl.protocol xrpld.app > test.unit_test xrpld.app > xrpl.basics xrpld.app > xrpl.core -xrpld.app > xrpld.consensus xrpld.app > xrpld.core -xrpld.app > xrpld.telemetry xrpld.app > xrpl.json xrpld.app > xrpl.ledger xrpld.app > xrpl.net @@ -257,7 +253,6 @@ xrpld.consensus > xrpl.json xrpld.consensus > xrpl.ledger xrpld.consensus > xrpl.protocol xrpld.consensus > xrpl.telemetry -xrpld.consensus > xrpld.telemetry xrpld.core > xrpl.basics xrpld.core > xrpl.core xrpld.core > xrpl.net @@ -275,6 +270,7 @@ xrpld.overlay > xrpl.protocol xrpld.overlay > xrpl.resource xrpld.overlay > xrpl.server xrpld.overlay > xrpl.shamap +xrpld.overlay > xrpl.telemetry xrpld.overlay > xrpl.tx xrpld.peerfinder > xrpl.basics xrpld.peerfinder > xrpld.core @@ -302,3 +298,5 @@ xrpld.shamap > xrpl.basics xrpld.shamap > xrpld.core xrpld.shamap > xrpl.protocol xrpld.shamap > xrpl.shamap +xrpld.telemetry > xrpl.basics +xrpld.telemetry > xrpl.telemetry diff --git a/src/xrpld/app/consensus/ConsensusSpanNames.h b/src/xrpld/app/consensus/ConsensusSpanNames.h index 77c2ad6bb5..a10ccf3b9e 100644 --- a/src/xrpld/app/consensus/ConsensusSpanNames.h +++ b/src/xrpld/app/consensus/ConsensusSpanNames.h @@ -9,6 +9,7 @@ * * consensus.round (deterministic trace_id from ledger hash) * | + * +-- consensus.phase.open * +-- consensus.proposal.send * +-- consensus.ledger_close * +-- consensus.establish @@ -18,6 +19,9 @@ * +-- consensus.accept.apply (jtACCEPT thread) * +-- consensus.validation.send (jtACCEPT thread, linked) * +-- consensus.mode_change + * + * consensus.proposal.receive (standalone, PeerImp) + * consensus.validation.receive (standalone, PeerImp) */ #include @@ -39,6 +43,9 @@ inline constexpr auto accept = makeStr("accept"); inline constexpr auto acceptApply = makeStr("accept.apply"); inline constexpr auto validationSend = makeStr("validation.send"); inline constexpr auto modeChange = makeStr("mode_change"); +inline constexpr auto proposalReceive = makeStr("proposal.receive"); +inline constexpr auto validationReceive = makeStr("validation.receive"); +inline constexpr auto phaseOpen = makeStr("phase.open"); } // namespace op // ===== Full span names (prefix.op) =========================================== @@ -53,6 +60,9 @@ inline constexpr auto accept = join(seg::consensus, op::accept); inline constexpr auto acceptApply = join(seg::consensus, op::acceptApply); inline constexpr auto validationSend = join(seg::consensus, op::validationSend); inline constexpr auto modeChange = join(seg::consensus, op::modeChange); +inline constexpr auto proposalReceive = join(seg::consensus, op::proposalReceive); +inline constexpr auto validationReceive = join(seg::consensus, op::validationReceive); +inline constexpr auto phaseOpen = join(seg::consensus, op::phaseOpen); // ===== Attribute keys ======================================================== @@ -145,6 +155,13 @@ inline constexpr auto disputeOurVote = inline constexpr auto disputeYays = join(join(seg::xrpl, makeStr("dispute")), makeStr("yays")); /// "xrpl.dispute.nays" inline constexpr auto disputeNays = join(join(seg::xrpl, makeStr("dispute")), makeStr("nays")); + +/// "xrpl.consensus.tx_count" +inline constexpr auto txCount = join(xrplConsensus, makeStr("tx_count")); +/// "xrpl.consensus.disputes_count" +inline constexpr auto disputesCount = join(xrplConsensus, makeStr("disputes_count")); +/// "xrpl.consensus.trusted" +inline constexpr auto trusted = join(xrplConsensus, makeStr("trusted")); } // namespace attr // ===== Attribute values ====================================================== diff --git a/src/xrpld/app/consensus/RCLConsensus.cpp b/src/xrpld/app/consensus/RCLConsensus.cpp index 8be7f7c1e1..6a342334a0 100644 --- a/src/xrpld/app/consensus/RCLConsensus.cpp +++ b/src/xrpld/app/consensus/RCLConsensus.cpp @@ -1,6 +1,6 @@ -#include #include +#include #include #include #include @@ -449,8 +449,8 @@ RCLConsensus::Adaptor::onAccept( bool const validating) { { - auto span = telemetry::SpanGuard::span( - telemetry::TraceCategory::Consensus, telemetry::seg::consensus, "accept"); + auto span = + telemetry::SpanGuard::childSpan(telemetry::cons_span::accept, roundSpanContext_); span.setAttribute( telemetry::cons_span::attr::proposers, static_cast(result.proposers)); span.setAttribute( @@ -511,8 +511,8 @@ RCLConsensus::Adaptor::doAccept( closeTimeCorrect = true; } - auto doAcceptSpan = telemetry::SpanGuard::span( - telemetry::TraceCategory::Consensus, telemetry::seg::consensus, "accept.apply"); + auto doAcceptSpan = + telemetry::SpanGuard::childSpan(telemetry::cons_span::acceptApply, roundSpanContext_); doAcceptSpan.setAttribute( telemetry::cons_span::attr::ledgerSeq, static_cast(prevLedger.seq() + 1)); doAcceptSpan.setAttribute( @@ -563,12 +563,16 @@ RCLConsensus::Adaptor::doAccept( JLOG(j_.debug()) << "Building canonical tx set: " << retriableTxs.key(); + int64_t txCount = 0; for (auto const& item : *result.txns.map_) { try { retriableTxs.insert(std::make_shared(SerialIter{item.slice()})); JLOG(j_.debug()) << " Tx: " << item.key(); + ++txCount; + auto const txHash = to_string(item.key()); + doAcceptSpan.addEvent("tx.included", {{telemetry::cons_span::attr::txId, txHash}}); } catch (std::exception const& ex) { @@ -576,6 +580,7 @@ RCLConsensus::Adaptor::doAccept( JLOG(j_.warn()) << " Tx: " << item.key() << " throws: " << ex.what(); } } + doAcceptSpan.setAttribute(telemetry::cons_span::attr::txCount, txCount); auto built = buildLCL( prevLedger, diff --git a/src/xrpld/app/misc/detail/TxQ.cpp b/src/xrpld/app/misc/detail/TxQ.cpp index 51a5e1e386..32842ab9ad 100644 --- a/src/xrpld/app/misc/detail/TxQ.cpp +++ b/src/xrpld/app/misc/detail/TxQ.cpp @@ -1,8 +1,8 @@ #include -#include #include #include +#include #include #include diff --git a/src/xrpld/consensus/Consensus.h b/src/xrpld/consensus/Consensus.h index 446c6be0a0..5bc8725fb4 100644 --- a/src/xrpld/consensus/Consensus.h +++ b/src/xrpld/consensus/Consensus.h @@ -609,6 +609,11 @@ private: */ std::optional establishSpan_; + /** Span for the open phase of consensus. + * Created in startRoundInternal(); cleared (ended) in closeLedger(). + */ + std::optional openSpan_; + /** Create the establish-phase span if not yet active. * Called on each phaseEstablish() invocation; no-op while span is live. */ @@ -695,6 +700,11 @@ Consensus::startRoundInternal( CLOG(clog) << "startRoundInternal transitioned to ConsensusPhase::open, " "previous ledgerID: " << prevLedgerID << ", seq: " << prevLedger.seq() << ". "; + openSpan_.emplace( + telemetry::SpanGuard::span( + telemetry::TraceCategory::Consensus, + telemetry::seg::consensus, + telemetry::cons_span::op::phaseOpen)); mode_.set(mode, adaptor_); now_ = now; prevLedgerID_ = prevLedgerID; @@ -1420,6 +1430,7 @@ Consensus::closeLedger(std::unique_ptr const& clog) // We should not be closing if we already have a position XRPL_ASSERT(!result_, "xrpl::Consensus::closeLedger : result is not set"); + openSpan_.reset(); phase_ = ConsensusPhase::establish; JLOG(j_.debug()) << "transitioned to ConsensusPhase::establish"; rawCloseTimes_.self = now_; @@ -1480,6 +1491,8 @@ Consensus::updateOurPositions(std::unique_ptr const& auto span = SpanGuard::span(TraceCategory::Consensus, seg::consensus, "update_positions"); span.setAttribute(cons_span::attr::convergePercent, static_cast(convergePercent_)); span.setAttribute(cons_span::attr::proposers, static_cast(currPeerPositions_.size())); + span.setAttribute( + cons_span::attr::disputesCount, static_cast(result_->disputes.size())); ConsensusParms const& parms = adaptor_.parms(); // Compute a cutoff time @@ -1540,10 +1553,14 @@ Consensus::updateOurPositions(std::unique_ptr const& mutableSet->erase(txId); } + auto const yaysStr = std::to_string(dispute.getYays()); + auto const naysStr = std::to_string(dispute.getNays()); span.addEvent( "dispute.resolve", {{cons_span::attr::txId, to_string(txId)}, - {cons_span::attr::disputeOurVote, dispute.getOurVote() ? "yes" : "no"}}); + {cons_span::attr::disputeOurVote, dispute.getOurVote() ? "yes" : "no"}, + {cons_span::attr::disputeYays, yaysStr}, + {cons_span::attr::disputeNays, naysStr}}); } } @@ -1568,6 +1585,7 @@ Consensus::updateOurPositions(std::unique_ptr const& if (newState) closeTimeAvalancheState_ = *newState; CLOG(clog) << "neededWeight " << neededWeight << ". "; + span.setAttribute(cons_span::attr::avalancheThreshold, static_cast(neededWeight)); int participants = currPeerPositions_.size(); if (mode_.get() == ConsensusMode::proposing) diff --git a/src/xrpld/overlay/detail/PeerImp.cpp b/src/xrpld/overlay/detail/PeerImp.cpp index 16f8484243..151285dc3c 100644 --- a/src/xrpld/overlay/detail/PeerImp.cpp +++ b/src/xrpld/overlay/detail/PeerImp.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -1943,6 +1944,13 @@ PeerImp::onMessage(std::shared_ptr const& m) } } + { + using namespace telemetry; + auto span = SpanGuard::span( + TraceCategory::Consensus, seg::consensus, cons_span::op::proposalReceive); + span.setAttribute(cons_span::attr::trusted, isTrusted); + } + JLOG(p_journal_.trace()) << "Proposal: " << (isTrusted ? "trusted" : "untrusted"); auto proposal = RCLCxPeerPos( @@ -2534,6 +2542,19 @@ PeerImp::onMessage(std::shared_ptr const& m) return; } + { + using namespace telemetry; + auto span = SpanGuard::span( + TraceCategory::Consensus, seg::consensus, cons_span::op::validationReceive); + span.setAttribute(cons_span::attr::trusted, isTrusted); + if (val->isFieldPresent(sfLedgerSequence)) + { + span.setAttribute( + cons_span::attr::ledgerSeq, + static_cast(val->getFieldU32(sfLedgerSequence))); + } + } + if (!isTrusted && (tracking_.load() == Tracking::diverged)) { JLOG(p_journal_.debug()) << "Dropping untrusted validation from diverged peer";