feat(telemetry): wire ValidationTracker to MetricsRegistry and consensus hooks

Add ValidationTracker member to MetricsRegistry with a public accessor,
register a rippled_validation_agreement observable gauge that calls
reconcile() and reports 1h/24h agreement percentages and counts, and
hook recordOurValidation/recordNetworkValidation into RCLConsensus
validate() and LedgerMaster setValidLedger() respectively.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-03-31 15:29:17 +01:00
parent 29e1e851a3
commit 5d8d7ad6dc
4 changed files with 84 additions and 4 deletions

View File

@@ -982,7 +982,12 @@ RCLConsensus::Adaptor::validate(RCLCxLedger const& ledger, RCLTxSet const& txns,
// Record validation sent for OTel dashboard parity counter.
if (auto* mr = app_.getMetricsRegistry())
{
mr->incrementValidationsSent();
// Record our validation for the agreement tracker so it can
// compare against network-validated ledgers.
mr->getValidationTracker().recordOurValidation(ledger.id(), ledger.seq());
}
}
void

View File

@@ -12,6 +12,7 @@
#include <xrpld/overlay/Overlay.h>
#include <xrpld/overlay/Peer.h>
#include <xrpld/rpc/detail/PathRequestManager.h>
#include <xrpld/telemetry/MetricsRegistry.h>
#include <xrpld/telemetry/TracingInstrumentation.h>
#include <xrpl/basics/MathUtilities.h>
@@ -251,6 +252,11 @@ LedgerMaster::setValidLedger(std::shared_ptr<Ledger const> const& l)
(void)max_ledger_difference_;
mValidLedgerSeq = l->header().seq;
// Record the network-validated ledger for the agreement tracker so it
// can compare against our own validations.
if (auto* mr = app_.getMetricsRegistry())
mr->getValidationTracker().recordNetworkValidation(l->header().hash, l->header().seq);
app_.getOPs().updateLocalTx(*l);
app_.getSHAMapStore().onLedgerClosed(getValidatedLedger());
mLedgerHistory.validatedLedger(l, consensusHash);

View File

@@ -972,10 +972,53 @@ MetricsRegistry::registerAsyncGauges()
},
this);
// TODO(Task 7.15): Wire validation agreement gauge to ValidationTracker
// after rebase brings ValidationTracker from Phase 7 into this branch.
// Will observe: agreement_pct_1h, agreement_pct_24h, agreements_1h,
// missed_1h, agreements_24h, missed_24h from validationTracker_.
// --- Task 7.15: Validation agreement gauges ---
// Reports rolling-window agreement percentages and counts from
// ValidationTracker. reconcile() is called at the start of the
// callback so that pending ledger events are resolved before the
// window data is read (the callback fires every ~10 s from the
// PeriodicExportingMetricReader thread).
validationAgreementGauge_ = meter_->CreateDoubleObservableGauge(
"rippled_validation_agreement",
"Validation agreement percentages and counts (1h/24h windows)");
validationAgreementGauge_->AddCallback(
[](opentelemetry::metrics::ObserverResult result, void* state) {
auto* self = static_cast<MetricsRegistry*>(state);
try
{
// Reconcile pending events before reading window data.
self->validationTracker_.reconcile();
auto observe = [&](char const* name, double value) {
opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
opentelemetry::metrics::ObserverResultT<double>>>(result)
->Observe(value, {{"metric", name}});
};
observe("agreement_pct_1h", self->validationTracker_.agreementPct1h());
observe("agreement_pct_24h", self->validationTracker_.agreementPct24h());
observe(
"agreements_1h", static_cast<double>(self->validationTracker_.agreements1h()));
observe("missed_1h", static_cast<double>(self->validationTracker_.missed1h()));
observe(
"agreements_24h",
static_cast<double>(self->validationTracker_.agreements24h()));
observe("missed_24h", static_cast<double>(self->validationTracker_.missed24h()));
}
catch (...) // NOLINT(bugprone-empty-catch)
{
// Silently skip on error.
}
},
this);
// Note: validationAgreementsCounter_ and validationMissedCounter_ are
// created above but not currently incremented. The
// rippled_validation_agreement gauge already provides agreement and miss
// counts from ValidationTracker's rolling windows and lifetime totals.
// These counters are reserved for future use if a push-style counter
// integration with ValidationTracker is desired.
}
#endif // XRPL_ENABLE_TELEMETRY

View File

@@ -38,6 +38,8 @@
| +-- rippled_state_changes_total
| +-- rippled_jq_trans_overflow_total
|
+-- ValidationTracker (validation agreement tracker)
|
+-- Observable Gauges (async callbacks, polled by reader)
+-- Cache hit rates (SLE, ledger, AL)
+-- TreeNode / FullBelow sizes
@@ -54,6 +56,7 @@
+-- Ledger economy (fees, reserves, age)
+-- State tracking (mode value, time in state)
+-- Storage detail (NuDB sizes)
+-- Validation agreement (1h/24h pct, counts)
Control-flow for async gauges:
@@ -122,6 +125,8 @@
instrumentation site.
*/
#include <xrpld/telemetry/ValidationTracker.h>
#include <xrpl/beast/utility/Journal.h>
#include <cstdint>
@@ -268,6 +273,17 @@ public:
void
incrementJqTransOverflow();
/** Access the validation agreement tracker.
Used by consensus and ledger hooks to record our validations and
network validations so the tracker can compute agreement percentages.
@return Reference to the internal ValidationTracker instance.
*/
ValidationTracker&
getValidationTracker()
{
return validationTracker_;
}
private:
/// Master enable flag; when false all methods are no-ops.
bool const enabled_;
@@ -278,6 +294,12 @@ private:
/// Journal for logging.
beast::Journal const journal_;
/// Tracks validation agreement between this node and the network.
/// Lives outside the XRPL_ENABLE_TELEMETRY guard because it is
/// always safe to record events; the gauge callback simply won't
/// fire when telemetry is disabled.
ValidationTracker validationTracker_;
#ifdef XRPL_ENABLE_TELEMETRY
/// The SDK MeterProvider that owns the export pipeline.
std::shared_ptr<opentelemetry::sdk::metrics::MeterProvider> provider_;
@@ -354,6 +376,10 @@ private:
/// Observable gauge for storage detail metrics (NuDB on-disk size).
opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument>
storageDetailGauge_;
/// Observable gauge for validation agreement metrics (1h/24h percentages
/// and counts from ValidationTracker).
opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument>
validationAgreementGauge_;
// --- External dashboard parity counters (Task 7.14) ---
/// Counter: rippled_ledgers_closed_total — incremented each consensus round.