diff --git a/src/xrpld/app/ledger/LedgerHistory.cpp b/src/xrpld/app/ledger/LedgerHistory.cpp index 8520fc941f..77c542fb16 100644 --- a/src/xrpld/app/ledger/LedgerHistory.cpp +++ b/src/xrpld/app/ledger/LedgerHistory.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -323,11 +324,19 @@ LedgerHistory::handleMismatch( auto builtLedger = getLedgerByHash(built); auto validLedger = getLedgerByHash(valid); + // Records the classified mismatch reason as a labeled OTel counter so + // fork diagnosis is a queryable time series, not just a log grep. + auto recordReason = [this](std::string_view reason) { + if (auto* mr = app_.getMetricsRegistry()) + mr->incrementLedgerHistoryMismatch(reason); + }; + if (!builtLedger || !validLedger) { JLOG(j_.error()) << "MISMATCH cannot be analyzed:" << " builtLedger: " << to_string(built) << " -> " << builtLedger << " validLedger: " << to_string(valid) << " -> " << validLedger; + recordReason("unknown"); return; } @@ -349,6 +358,7 @@ LedgerHistory::handleMismatch( if (builtLedger->header().parentHash != validLedger->header().parentHash) { JLOG(j_.error()) << "MISMATCH on prior ledger"; + recordReason("prior_ledger"); return; } @@ -356,6 +366,7 @@ LedgerHistory::handleMismatch( if (builtLedger->header().closeTime != validLedger->header().closeTime) { JLOG(j_.error()) << "MISMATCH on close time"; + recordReason("close_time"); return; } @@ -366,6 +377,7 @@ LedgerHistory::handleMismatch( JLOG(j_.error()) << "MISMATCH on consensus transaction set " << " built: " << to_string(*builtConsensusHash) << " validated: " << to_string(*validatedConsensusHash); + recordReason("consensus_txset"); } else JLOG(j_.error()) << "MISMATCH with same consensus transaction set: " @@ -379,10 +391,14 @@ LedgerHistory::handleMismatch( if (builtTx == validTx) { JLOG(j_.error()) << "MISMATCH with same " << builtTx.size() << " transactions"; + recordReason("same_txset_diff_result"); } else + { JLOG(j_.error()) << "MISMATCH with " << builtTx.size() << " built and " << validTx.size() << " valid transactions."; + recordReason("different_txset"); + } JLOG(j_.error()) << "built\n" << getJson({*builtLedger, {}}); JLOG(j_.error()) << "valid\n" << getJson({*validLedger, {}}); diff --git a/src/xrpld/telemetry/MetricsRegistry.cpp b/src/xrpld/telemetry/MetricsRegistry.cpp index b7f30b6004..ef1c4ead47 100644 --- a/src/xrpld/telemetry/MetricsRegistry.cpp +++ b/src/xrpld/telemetry/MetricsRegistry.cpp @@ -237,6 +237,9 @@ MetricsRegistry::start(std::string const& endpoint, std::string const& instanceI meter_->CreateUInt64Counter("xrpld_state_changes_total", "Total operating mode changes"); jqTransOverflowCounter_ = meter_->CreateUInt64Counter( "xrpld_jq_trans_overflow_total", "Total job queue transaction overflows"); + ledgerHistoryMismatchCounter_ = meter_->CreateUInt64Counter( + "xrpld_ledger_history_mismatch_total", + "Total built-vs-validated ledger mismatches by reason"); validationAgreementsCounter_ = meter_->CreateUInt64Counter( "xrpld_validation_agreements_total", "Total validation agreements"); validationMissedCounter_ = @@ -1326,4 +1329,13 @@ MetricsRegistry::incrementJqTransOverflow() #endif } +void +MetricsRegistry::incrementLedgerHistoryMismatch(std::string_view reason) +{ +#ifdef XRPL_ENABLE_TELEMETRY + if (enabled_ && ledgerHistoryMismatchCounter_) + ledgerHistoryMismatchCounter_->Add(1, {{"reason", std::string(reason)}}); +#endif +} + } // namespace xrpl::telemetry diff --git a/src/xrpld/telemetry/MetricsRegistry.h b/src/xrpld/telemetry/MetricsRegistry.h index 1d84932022..8ae9129758 100644 --- a/src/xrpld/telemetry/MetricsRegistry.h +++ b/src/xrpld/telemetry/MetricsRegistry.h @@ -349,6 +349,17 @@ public: void incrementJqTransOverflow(); + /** Increment the ledger_history_mismatch_total counter for a reason. + Called from LedgerHistory::handleMismatch() once the mismatch has + been classified. The reason label turns fork diagnosis from a + log-grep into a queryable time series. + @param reason Classified mismatch cause (e.g. "prior_ledger", + "close_time", "consensus_txset", "same_txset_diff_result", + "unknown"). + */ + void + incrementLedgerHistoryMismatch(std::string_view reason); + /** Access the validation agreement tracker. Used by consensus and ledger hooks to record our validations and network validations so the tracker can compute agreement percentages. @@ -483,6 +494,10 @@ private: /// Counter: xrpld_jq_trans_overflow_total — incremented on job queue transaction overflows. opentelemetry::nostd::unique_ptr> jqTransOverflowCounter_; + /// Counter: xrpld_ledger_history_mismatch_total{reason} — incremented per classified + /// built-vs-validated ledger mismatch. + opentelemetry::nostd::unique_ptr> + ledgerHistoryMismatchCounter_; /// Counter: xrpld_validation_agreements_total — incremented by ValidationTracker on /// agreement. opentelemetry::nostd::unique_ptr>