feat(telemetry): add reduce-relay efficiency gauge

The transaction reduce-relay subsystem (selected vs suppressed peers,
feature-disabled peers, missing-tx frequency) was computed in OverlayImpl's
TxMetrics but only surfaced via the get_counts JSON RPC — invisible to
Prometheus/Grafana, despite being the central efficiency KPI for the feature.

Add an observable gauge xrpld_reduce_relay_metrics{metric} that reads
Overlay::txMetrics() and parses its rolling-average fields:
- selected_peers     (txr_selected_cnt)
- suppressed_peers   (txr_suppressed_cnt)
- not_enabled_peers  (txr_not_enabled_cnt)
- missing_tx_freq    (txr_missing_tx_freq)

The JSON values are decimal strings (std::to_string), parsed via std::stoll —
the same JSON-reading pattern as registerNodeStoreGauge. No new Overlay
accessor or core-interface change required.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-06-04 16:14:33 +01:00
parent 864ac729de
commit 9376aa7c88
2 changed files with 58 additions and 0 deletions

View File

@@ -436,6 +436,7 @@ MetricsRegistry::registerAsyncGauges()
registerDbMetricsGauge();
registerValidatorHealthGauge();
registerPeerQualityGauge();
registerReduceRelayGauge();
registerLedgerEconomyGauge();
registerStateTrackingGauge();
registerStorageDetailGauge();
@@ -1072,6 +1073,57 @@ MetricsRegistry::registerPeerQualityGauge()
this);
}
void
MetricsRegistry::registerReduceRelayGauge()
{
// Transaction reduce-relay efficiency. Overlay::txMetrics() exposes the
// rolling averages as a JSON object with string values (std::to_string),
// so parse each field. A high suppressed:selected ratio proves the
// feature is saving bandwidth; a high not_enabled count means stale peers
// force full relay.
reduceRelayGauge_ = meter_->CreateInt64ObservableGauge(
"xrpld_reduce_relay_metrics", "Transaction reduce-relay efficiency metrics");
reduceRelayGauge_->AddCallback(
[](opentelemetry::metrics::ObserverResult result, void* state) {
auto* self = static_cast<MetricsRegistry*>(state);
if (self->callbacksDetached_.load(std::memory_order_acquire))
return;
auto& app = self->app_;
try
{
auto const tm = app.getOverlay().txMetrics();
auto observe = [&](char const* name, int64_t value) {
opentelemetry::nostd::get<opentelemetry::nostd::shared_ptr<
opentelemetry::metrics::ObserverResultT<int64_t>>>(result)
->Observe(value, {{"metric", name}});
};
// Each field is a decimal string; emit when present and parseable.
auto observeField = [&](auto const& field, char const* name) {
if (tm.isMember(field))
{
auto const s = tm[field].asString();
if (!s.empty())
observe(name, static_cast<int64_t>(std::stoll(s)));
}
};
observeField(jss::txr_selected_cnt, "selected_peers");
observeField(jss::txr_suppressed_cnt, "suppressed_peers");
observeField(jss::txr_not_enabled_cnt, "not_enabled_peers");
observeField(jss::txr_missing_tx_freq, "missing_tx_freq");
}
catch (...) // NOLINT(bugprone-empty-catch)
{
// Silently skip if services are not yet ready or a value is
// not parseable.
}
},
this);
}
void
MetricsRegistry::registerLedgerEconomyGauge()
{

View File

@@ -478,6 +478,10 @@ private:
/// insane peer count, version spread, upgrade recommendation).
opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument>
peerQualityGauge_;
/// Observable gauge for transaction reduce-relay efficiency (selected vs
/// suppressed peers, feature-disabled peers, missing-tx frequency).
opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument>
reduceRelayGauge_;
/// Observable gauge for ledger economy metrics (base fee, reserve,
/// reserve increment, ledger age).
opentelemetry::nostd::shared_ptr<opentelemetry::metrics::ObservableInstrument>
@@ -563,6 +567,8 @@ private:
void
registerPeerQualityGauge(); // Task 7.10
void
registerReduceRelayGauge(); // Reduce-relay efficiency
void
registerLedgerEconomyGauge(); // Task 7.11
void
registerStateTrackingGauge(); // Task 7.12