mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-05 01:37:00 +00:00
feat(telemetry): add xrpld_ledger_history_mismatch_total{reason} counter
LedgerHistory::handleMismatch() already classifies a built-vs-validated ledger mismatch (prior ledger, close time, consensus tx set, same/different tx set), but only bumped a single untyped beast::insight counter — the reason was dropped. Fork diagnosis was therefore a log-grep exercise. Add a labeled OTel counter so the mismatch reason is a queryable time series: - MetricsRegistry: new ledgerHistoryMismatchCounter_ + incrementLedgerHistoryMismatch(reason) - LedgerHistory: record one reason per classification branch (unknown, prior_ledger, close_time, consensus_txset, same_txset_diff_result, different_txset). Reaches MetricsRegistry via the existing app_ reference. The existing beast::insight mismatchCounter_ is left intact. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
#include <xrpld/app/ledger/LedgerToJson.h>
|
||||
#include <xrpld/app/main/Application.h>
|
||||
#include <xrpld/core/Config.h>
|
||||
#include <xrpld/telemetry/MetricsRegistry.h>
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/basics/base_uint.h>
|
||||
@@ -323,11 +324,19 @@ LedgerHistory::handleMismatch(
|
||||
auto builtLedger = getLedgerByHash(built);
|
||||
auto validLedger = getLedgerByHash(valid);
|
||||
|
||||
// Records the classified mismatch reason as a labeled OTel counter so
|
||||
// fork diagnosis is a queryable time series, not just a log grep.
|
||||
auto recordReason = [this](std::string_view reason) {
|
||||
if (auto* mr = app_.getMetricsRegistry())
|
||||
mr->incrementLedgerHistoryMismatch(reason);
|
||||
};
|
||||
|
||||
if (!builtLedger || !validLedger)
|
||||
{
|
||||
JLOG(j_.error()) << "MISMATCH cannot be analyzed:"
|
||||
<< " builtLedger: " << to_string(built) << " -> " << builtLedger
|
||||
<< " validLedger: " << to_string(valid) << " -> " << validLedger;
|
||||
recordReason("unknown");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -349,6 +358,7 @@ LedgerHistory::handleMismatch(
|
||||
if (builtLedger->header().parentHash != validLedger->header().parentHash)
|
||||
{
|
||||
JLOG(j_.error()) << "MISMATCH on prior ledger";
|
||||
recordReason("prior_ledger");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -356,6 +366,7 @@ LedgerHistory::handleMismatch(
|
||||
if (builtLedger->header().closeTime != validLedger->header().closeTime)
|
||||
{
|
||||
JLOG(j_.error()) << "MISMATCH on close time";
|
||||
recordReason("close_time");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -366,6 +377,7 @@ LedgerHistory::handleMismatch(
|
||||
JLOG(j_.error()) << "MISMATCH on consensus transaction set "
|
||||
<< " built: " << to_string(*builtConsensusHash)
|
||||
<< " validated: " << to_string(*validatedConsensusHash);
|
||||
recordReason("consensus_txset");
|
||||
}
|
||||
else
|
||||
JLOG(j_.error()) << "MISMATCH with same consensus transaction set: "
|
||||
@@ -379,10 +391,14 @@ LedgerHistory::handleMismatch(
|
||||
if (builtTx == validTx)
|
||||
{
|
||||
JLOG(j_.error()) << "MISMATCH with same " << builtTx.size() << " transactions";
|
||||
recordReason("same_txset_diff_result");
|
||||
}
|
||||
else
|
||||
{
|
||||
JLOG(j_.error()) << "MISMATCH with " << builtTx.size() << " built and " << validTx.size()
|
||||
<< " valid transactions.";
|
||||
recordReason("different_txset");
|
||||
}
|
||||
|
||||
JLOG(j_.error()) << "built\n" << getJson({*builtLedger, {}});
|
||||
JLOG(j_.error()) << "valid\n" << getJson({*validLedger, {}});
|
||||
|
||||
@@ -237,6 +237,9 @@ MetricsRegistry::start(std::string const& endpoint, std::string const& instanceI
|
||||
meter_->CreateUInt64Counter("xrpld_state_changes_total", "Total operating mode changes");
|
||||
jqTransOverflowCounter_ = meter_->CreateUInt64Counter(
|
||||
"xrpld_jq_trans_overflow_total", "Total job queue transaction overflows");
|
||||
ledgerHistoryMismatchCounter_ = meter_->CreateUInt64Counter(
|
||||
"xrpld_ledger_history_mismatch_total",
|
||||
"Total built-vs-validated ledger mismatches by reason");
|
||||
validationAgreementsCounter_ = meter_->CreateUInt64Counter(
|
||||
"xrpld_validation_agreements_total", "Total validation agreements");
|
||||
validationMissedCounter_ =
|
||||
@@ -1326,4 +1329,13 @@ MetricsRegistry::incrementJqTransOverflow()
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
MetricsRegistry::incrementLedgerHistoryMismatch(std::string_view reason)
|
||||
{
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
if (enabled_ && ledgerHistoryMismatchCounter_)
|
||||
ledgerHistoryMismatchCounter_->Add(1, {{"reason", std::string(reason)}});
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace xrpl::telemetry
|
||||
|
||||
@@ -349,6 +349,17 @@ public:
|
||||
void
|
||||
incrementJqTransOverflow();
|
||||
|
||||
/** Increment the ledger_history_mismatch_total counter for a reason.
|
||||
Called from LedgerHistory::handleMismatch() once the mismatch has
|
||||
been classified. The reason label turns fork diagnosis from a
|
||||
log-grep into a queryable time series.
|
||||
@param reason Classified mismatch cause (e.g. "prior_ledger",
|
||||
"close_time", "consensus_txset", "same_txset_diff_result",
|
||||
"unknown").
|
||||
*/
|
||||
void
|
||||
incrementLedgerHistoryMismatch(std::string_view reason);
|
||||
|
||||
/** Access the validation agreement tracker.
|
||||
Used by consensus and ledger hooks to record our validations and
|
||||
network validations so the tracker can compute agreement percentages.
|
||||
@@ -483,6 +494,10 @@ private:
|
||||
/// Counter: xrpld_jq_trans_overflow_total — incremented on job queue transaction overflows.
|
||||
opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Counter<uint64_t>>
|
||||
jqTransOverflowCounter_;
|
||||
/// Counter: xrpld_ledger_history_mismatch_total{reason} — incremented per classified
|
||||
/// built-vs-validated ledger mismatch.
|
||||
opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Counter<uint64_t>>
|
||||
ledgerHistoryMismatchCounter_;
|
||||
/// Counter: xrpld_validation_agreements_total — incremented by ValidationTracker on
|
||||
/// agreement.
|
||||
opentelemetry::nostd::unique_ptr<opentelemetry::metrics::Counter<uint64_t>>
|
||||
|
||||
Reference in New Issue
Block a user