feat(telemetry): complete Phase 4 consensus tracing

Implement remaining Phase 4/4a consensus tracing tasks:

- Add consensus.phase.open span (open → closeLedger lifecycle)
- Add consensus.proposal.receive span in PeerImp with trusted attr
- Add consensus.validation.receive span in PeerImp with trusted/seq attrs
- Add tx_count attr on accept.apply, disputes_count on update_positions
- Add tx.included events with txId in doAccept transaction loop
- Enhance dispute.resolve event with yays/nays fields
- Add avalanche_threshold attr on update_positions span
- Reparent accept/accept.apply as children of round span via childSpan()

Also adds compile-time constants in ConsensusSpanNames.h and updates
the span hierarchy diagram.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-04-28 16:16:53 +01:00
parent 90c2321bb8
commit bc49eb6f83
7 changed files with 78 additions and 13 deletions

View File

@@ -5,7 +5,10 @@ Loop: test.jtx test.unit_test
test.unit_test ~= test.jtx
Loop: xrpl.telemetry xrpld.rpc
xrpld.rpc ~= xrpl.telemetry
xrpld.rpc > xrpl.telemetry
Loop: xrpld.app xrpld.consensus
xrpld.app > xrpld.consensus
Loop: xrpld.app xrpld.overlay
xrpld.app > xrpld.overlay
@@ -19,6 +22,9 @@ Loop: xrpld.app xrpld.rpc
Loop: xrpld.app xrpld.shamap
xrpld.shamap > xrpld.app
Loop: xrpld.app xrpld.telemetry
xrpld.telemetry == xrpld.app
Loop: xrpld.overlay xrpld.rpc
xrpld.rpc ~= xrpld.overlay

View File

@@ -101,7 +101,6 @@ test.core > xrpl.server
test.csf > xrpl.basics
test.csf > xrpld.consensus
test.csf > xrpl.json
test.csf > xrpl.telemetry
test.csf > xrpl.ledger
test.csf > xrpl.protocol
test.json > test.jtx
@@ -196,7 +195,6 @@ tests.libxrpl > xrpl.net
tests.libxrpl > xrpl.protocol
tests.libxrpl > xrpl.protocol_autogen
tests.libxrpl > xrpl.telemetry
tests.libxrpl > xrpld.telemetry
xrpl.conditions > xrpl.basics
xrpl.conditions > xrpl.protocol
xrpl.core > xrpl.basics
@@ -238,9 +236,7 @@ xrpl.tx > xrpl.protocol
xrpld.app > test.unit_test
xrpld.app > xrpl.basics
xrpld.app > xrpl.core
xrpld.app > xrpld.consensus
xrpld.app > xrpld.core
xrpld.app > xrpld.telemetry
xrpld.app > xrpl.json
xrpld.app > xrpl.ledger
xrpld.app > xrpl.net
@@ -257,7 +253,6 @@ xrpld.consensus > xrpl.json
xrpld.consensus > xrpl.ledger
xrpld.consensus > xrpl.protocol
xrpld.consensus > xrpl.telemetry
xrpld.consensus > xrpld.telemetry
xrpld.core > xrpl.basics
xrpld.core > xrpl.core
xrpld.core > xrpl.net
@@ -275,6 +270,7 @@ xrpld.overlay > xrpl.protocol
xrpld.overlay > xrpl.resource
xrpld.overlay > xrpl.server
xrpld.overlay > xrpl.shamap
xrpld.overlay > xrpl.telemetry
xrpld.overlay > xrpl.tx
xrpld.peerfinder > xrpl.basics
xrpld.peerfinder > xrpld.core
@@ -302,3 +298,5 @@ xrpld.shamap > xrpl.basics
xrpld.shamap > xrpld.core
xrpld.shamap > xrpl.protocol
xrpld.shamap > xrpl.shamap
xrpld.telemetry > xrpl.basics
xrpld.telemetry > xrpl.telemetry

View File

@@ -9,6 +9,7 @@
*
* consensus.round (deterministic trace_id from ledger hash)
* |
* +-- consensus.phase.open
* +-- consensus.proposal.send
* +-- consensus.ledger_close
* +-- consensus.establish
@@ -18,6 +19,9 @@
* +-- consensus.accept.apply (jtACCEPT thread)
* +-- consensus.validation.send (jtACCEPT thread, linked)
* +-- consensus.mode_change
*
* consensus.proposal.receive (standalone, PeerImp)
* consensus.validation.receive (standalone, PeerImp)
*/
#include <xrpl/telemetry/SpanNames.h>
@@ -39,6 +43,9 @@ inline constexpr auto accept = makeStr("accept");
inline constexpr auto acceptApply = makeStr("accept.apply");
inline constexpr auto validationSend = makeStr("validation.send");
inline constexpr auto modeChange = makeStr("mode_change");
inline constexpr auto proposalReceive = makeStr("proposal.receive");
inline constexpr auto validationReceive = makeStr("validation.receive");
inline constexpr auto phaseOpen = makeStr("phase.open");
} // namespace op
// ===== Full span names (prefix.op) ===========================================
@@ -53,6 +60,9 @@ inline constexpr auto accept = join(seg::consensus, op::accept);
inline constexpr auto acceptApply = join(seg::consensus, op::acceptApply);
inline constexpr auto validationSend = join(seg::consensus, op::validationSend);
inline constexpr auto modeChange = join(seg::consensus, op::modeChange);
inline constexpr auto proposalReceive = join(seg::consensus, op::proposalReceive);
inline constexpr auto validationReceive = join(seg::consensus, op::validationReceive);
inline constexpr auto phaseOpen = join(seg::consensus, op::phaseOpen);
// ===== Attribute keys ========================================================
@@ -145,6 +155,13 @@ inline constexpr auto disputeOurVote =
inline constexpr auto disputeYays = join(join(seg::xrpl, makeStr("dispute")), makeStr("yays"));
/// "xrpl.dispute.nays"
inline constexpr auto disputeNays = join(join(seg::xrpl, makeStr("dispute")), makeStr("nays"));
/// "xrpl.consensus.tx_count"
inline constexpr auto txCount = join(xrplConsensus, makeStr("tx_count"));
/// "xrpl.consensus.disputes_count"
inline constexpr auto disputesCount = join(xrplConsensus, makeStr("disputes_count"));
/// "xrpl.consensus.trusted"
inline constexpr auto trusted = join(xrplConsensus, makeStr("trusted"));
} // namespace attr
// ===== Attribute values ======================================================

View File

@@ -1,6 +1,6 @@
#include <xrpld/app/consensus/ConsensusSpanNames.h>
#include <xrpld/app/consensus/RCLConsensus.h>
#include <xrpld/app/consensus/ConsensusSpanNames.h>
#include <xrpld/app/consensus/RCLCensorshipDetector.h>
#include <xrpld/app/consensus/RCLCxLedger.h>
#include <xrpld/app/consensus/RCLCxPeerPos.h>
@@ -449,8 +449,8 @@ RCLConsensus::Adaptor::onAccept(
bool const validating)
{
{
auto span = telemetry::SpanGuard::span(
telemetry::TraceCategory::Consensus, telemetry::seg::consensus, "accept");
auto span =
telemetry::SpanGuard::childSpan(telemetry::cons_span::accept, roundSpanContext_);
span.setAttribute(
telemetry::cons_span::attr::proposers, static_cast<int64_t>(result.proposers));
span.setAttribute(
@@ -511,8 +511,8 @@ RCLConsensus::Adaptor::doAccept(
closeTimeCorrect = true;
}
auto doAcceptSpan = telemetry::SpanGuard::span(
telemetry::TraceCategory::Consensus, telemetry::seg::consensus, "accept.apply");
auto doAcceptSpan =
telemetry::SpanGuard::childSpan(telemetry::cons_span::acceptApply, roundSpanContext_);
doAcceptSpan.setAttribute(
telemetry::cons_span::attr::ledgerSeq, static_cast<int64_t>(prevLedger.seq() + 1));
doAcceptSpan.setAttribute(
@@ -563,12 +563,16 @@ RCLConsensus::Adaptor::doAccept(
JLOG(j_.debug()) << "Building canonical tx set: " << retriableTxs.key();
int64_t txCount = 0;
for (auto const& item : *result.txns.map_)
{
try
{
retriableTxs.insert(std::make_shared<STTx const>(SerialIter{item.slice()}));
JLOG(j_.debug()) << " Tx: " << item.key();
++txCount;
auto const txHash = to_string(item.key());
doAcceptSpan.addEvent("tx.included", {{telemetry::cons_span::attr::txId, txHash}});
}
catch (std::exception const& ex)
{
@@ -576,6 +580,7 @@ RCLConsensus::Adaptor::doAccept(
JLOG(j_.warn()) << " Tx: " << item.key() << " throws: " << ex.what();
}
}
doAcceptSpan.setAttribute(telemetry::cons_span::attr::txCount, txCount);
auto built = buildLCL(
prevLedger,

View File

@@ -1,8 +1,8 @@
#include <xrpld/app/misc/TxQ.h>
#include <xrpld/app/misc/detail/TxQSpanNames.h>
#include <xrpld/app/ledger/OpenLedger.h>
#include <xrpld/app/main/Application.h>
#include <xrpld/app/misc/detail/TxQSpanNames.h>
#include <xrpl/basics/BasicConfig.h>
#include <xrpl/basics/Log.h>

View File

@@ -609,6 +609,11 @@ private:
*/
std::optional<xrpl::telemetry::SpanGuard> establishSpan_;
/** Span for the open phase of consensus.
* Created in startRoundInternal(); cleared (ended) in closeLedger().
*/
std::optional<xrpl::telemetry::SpanGuard> openSpan_;
/** Create the establish-phase span if not yet active.
* Called on each phaseEstablish() invocation; no-op while span is live.
*/
@@ -695,6 +700,11 @@ Consensus<Adaptor>::startRoundInternal(
CLOG(clog) << "startRoundInternal transitioned to ConsensusPhase::open, "
"previous ledgerID: "
<< prevLedgerID << ", seq: " << prevLedger.seq() << ". ";
openSpan_.emplace(
telemetry::SpanGuard::span(
telemetry::TraceCategory::Consensus,
telemetry::seg::consensus,
telemetry::cons_span::op::phaseOpen));
mode_.set(mode, adaptor_);
now_ = now;
prevLedgerID_ = prevLedgerID;
@@ -1420,6 +1430,7 @@ Consensus<Adaptor>::closeLedger(std::unique_ptr<std::stringstream> const& clog)
// We should not be closing if we already have a position
XRPL_ASSERT(!result_, "xrpl::Consensus::closeLedger : result is not set");
openSpan_.reset();
phase_ = ConsensusPhase::establish;
JLOG(j_.debug()) << "transitioned to ConsensusPhase::establish";
rawCloseTimes_.self = now_;
@@ -1480,6 +1491,8 @@ Consensus<Adaptor>::updateOurPositions(std::unique_ptr<std::stringstream> const&
auto span = SpanGuard::span(TraceCategory::Consensus, seg::consensus, "update_positions");
span.setAttribute(cons_span::attr::convergePercent, static_cast<int64_t>(convergePercent_));
span.setAttribute(cons_span::attr::proposers, static_cast<int64_t>(currPeerPositions_.size()));
span.setAttribute(
cons_span::attr::disputesCount, static_cast<int64_t>(result_->disputes.size()));
ConsensusParms const& parms = adaptor_.parms();
// Compute a cutoff time
@@ -1540,10 +1553,14 @@ Consensus<Adaptor>::updateOurPositions(std::unique_ptr<std::stringstream> const&
mutableSet->erase(txId);
}
auto const yaysStr = std::to_string(dispute.getYays());
auto const naysStr = std::to_string(dispute.getNays());
span.addEvent(
"dispute.resolve",
{{cons_span::attr::txId, to_string(txId)},
{cons_span::attr::disputeOurVote, dispute.getOurVote() ? "yes" : "no"}});
{cons_span::attr::disputeOurVote, dispute.getOurVote() ? "yes" : "no"},
{cons_span::attr::disputeYays, yaysStr},
{cons_span::attr::disputeNays, naysStr}});
}
}
@@ -1568,6 +1585,7 @@ Consensus<Adaptor>::updateOurPositions(std::unique_ptr<std::stringstream> const&
if (newState)
closeTimeAvalancheState_ = *newState;
CLOG(clog) << "neededWeight " << neededWeight << ". ";
span.setAttribute(cons_span::attr::avalancheThreshold, static_cast<int64_t>(neededWeight));
int participants = currPeerPositions_.size();
if (mode_.get() == ConsensusMode::proposing)

View File

@@ -1,5 +1,6 @@
#include <xrpld/overlay/detail/PeerImp.h>
#include <xrpld/app/consensus/ConsensusSpanNames.h>
#include <xrpld/app/consensus/RCLCxPeerPos.h>
#include <xrpld/app/consensus/RCLValidations.h>
#include <xrpld/app/ledger/InboundLedgers.h>
@@ -1943,6 +1944,13 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMProposeSet> const& m)
}
}
{
using namespace telemetry;
auto span = SpanGuard::span(
TraceCategory::Consensus, seg::consensus, cons_span::op::proposalReceive);
span.setAttribute(cons_span::attr::trusted, isTrusted);
}
JLOG(p_journal_.trace()) << "Proposal: " << (isTrusted ? "trusted" : "untrusted");
auto proposal = RCLCxPeerPos(
@@ -2534,6 +2542,19 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
return;
}
{
using namespace telemetry;
auto span = SpanGuard::span(
TraceCategory::Consensus, seg::consensus, cons_span::op::validationReceive);
span.setAttribute(cons_span::attr::trusted, isTrusted);
if (val->isFieldPresent(sfLedgerSequence))
{
span.setAttribute(
cons_span::attr::ledgerSeq,
static_cast<int64_t>(val->getFieldU32(sfLedgerSequence)));
}
}
if (!isTrusted && (tracking_.load() == Tracking::diverged))
{
JLOG(p_journal_.debug()) << "Dropping untrusted validation from diverged peer";