From 5601615952f97207cbfd0288a57f22bb548b33e9 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 13 May 2026 14:59:00 +0100 Subject: [PATCH] fix(telemetry): align Phase 9 dashboards and integration-test with xrpld_ metric prefix MetricsRegistry emits OTel SDK metrics with the xrpld_ prefix (MetricsRegistry.cpp defines "xrpld_nodestore_state", "xrpld_cache_metrics", etc.), but the Phase 9 dashboards and the Step 10c integration-test assertions introduced in 892fee638a queried the rippled_ prefix. Every Phase 9 panel and assertion therefore rendered "No data" or failed on a live run, even though the underlying series were being exported correctly. Rename the rippled_ prefix to xrpld_ for every MetricsRegistry metric in dashboards and the integration test: - nodestore_state, cache_metrics, txq_metrics, load_factor_metrics, object_count - rpc_method_started_total / _finished_total / _errored_total / _duration_us_bucket - job_queued_total / _started_total / _finished_total / _queued_duration_us_bucket / _running_duration_us_bucket - peer_quality, server_info, validator_health, ledger_economy, db_metrics, complete_ledgers, build_info, state_tracking - ledgers_closed_total, validations_sent_total, validations_checked_total, state_changes_total - validation_agreement (ValidationTracker 1h/24h/7d windows) Also add ValidationTracker window-gauge assertions to Step 10c of integration-test.sh so the 1h/24h/7d agreement and miss counts are checked alongside the other Phase 9 gauges. The rippled_ prefix is preserved for beast::insight metrics (rippled_LedgerMaster_*, rippled_Peer_Finder_*, rippled_total_*, rippled_Overlay_*, rippled_State_Accounting_*, rippled_transactions_*, rippled_proposals_*, rippled_validations_Messages_*) because those flow through the StatsD-style OTelCollector configured with `[insight] prefix=rippled` and remain on that prefix by design. Verified against a live 6-node consensus network: all 22 Phase 9 + ValidationTracker assertions now report 6+ series per metric. Co-Authored-By: Claude Opus 4.7 --- .../dashboards/rippled-fee-market.json | 30 ++++----- .../grafana/dashboards/rippled-job-queue.json | 26 ++++---- .../dashboards/rippled-peer-quality.json | 12 ++-- .../grafana/dashboards/rippled-rpc-perf.json | 22 +++---- .../dashboards/rippled-validator-health.json | 32 +++++----- .../dashboards/system-node-health.json | 62 +++++++++---------- docker/telemetry/integration-test.sh | 41 ++++++++---- 7 files changed, 120 insertions(+), 105 deletions(-) diff --git a/docker/telemetry/grafana/dashboards/rippled-fee-market.json b/docker/telemetry/grafana/dashboards/rippled-fee-market.json index 85fb1aa102..09e777835e 100644 --- a/docker/telemetry/grafana/dashboards/rippled-fee-market.json +++ b/docker/telemetry/grafana/dashboards/rippled-fee-market.json @@ -30,14 +30,14 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_count\"}", + "expr": "xrpld_txq_metrics{exported_instance=~\"$node\", metric=\"txq_count\"}", "legendFormat": "Queue Depth [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_max_size\"}", + "expr": "xrpld_txq_metrics{exported_instance=~\"$node\", metric=\"txq_max_size\"}", "legendFormat": "Max Capacity [{{exported_instance}}]" } ], @@ -78,14 +78,14 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_in_ledger\"}", + "expr": "xrpld_txq_metrics{exported_instance=~\"$node\", metric=\"txq_in_ledger\"}", "legendFormat": "In Ledger [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_per_ledger\"}", + "expr": "xrpld_txq_metrics{exported_instance=~\"$node\", metric=\"txq_per_ledger\"}", "legendFormat": "Expected Per Ledger [{{exported_instance}}]" } ], @@ -126,28 +126,28 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_reference_fee_level\"}", + "expr": "xrpld_txq_metrics{exported_instance=~\"$node\", metric=\"txq_reference_fee_level\"}", "legendFormat": "Reference Fee Level [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_min_processing_fee_level\"}", + "expr": "xrpld_txq_metrics{exported_instance=~\"$node\", metric=\"txq_min_processing_fee_level\"}", "legendFormat": "Min Processing Fee Level [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_med_fee_level\"}", + "expr": "xrpld_txq_metrics{exported_instance=~\"$node\", metric=\"txq_med_fee_level\"}", "legendFormat": "Median Fee Level [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_txq_metrics{exported_instance=~\"$node\", metric=\"txq_open_ledger_fee_level\"}", + "expr": "xrpld_txq_metrics{exported_instance=~\"$node\", metric=\"txq_open_ledger_fee_level\"}", "legendFormat": "Open Ledger Fee Level [{{exported_instance}}]" } ], @@ -192,28 +192,28 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor\"}", + "expr": "xrpld_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor\"}", "legendFormat": "Combined Load Factor [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_server\"}", + "expr": "xrpld_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_server\"}", "legendFormat": "Server [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_fee_escalation\"}", + "expr": "xrpld_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_fee_escalation\"}", "legendFormat": "Fee Escalation [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_fee_queue\"}", + "expr": "xrpld_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_fee_queue\"}", "legendFormat": "Fee Queue [{{exported_instance}}]" } ], @@ -270,21 +270,21 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_local\"}", + "expr": "xrpld_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_local\"}", "legendFormat": "Local [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_net\"}", + "expr": "xrpld_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_net\"}", "legendFormat": "Network [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_cluster\"}", + "expr": "xrpld_load_factor_metrics{exported_instance=~\"$node\", metric=\"load_factor_cluster\"}", "legendFormat": "Cluster [{{exported_instance}}]" } ], diff --git a/docker/telemetry/grafana/dashboards/rippled-job-queue.json b/docker/telemetry/grafana/dashboards/rippled-job-queue.json index e29b96f750..7536fac767 100644 --- a/docker/telemetry/grafana/dashboards/rippled-job-queue.json +++ b/docker/telemetry/grafana/dashboards/rippled-job-queue.json @@ -30,21 +30,21 @@ "datasource": { "type": "prometheus" }, - "expr": "sum by (exported_instance) (rate(rippled_job_queued_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", + "expr": "sum by (exported_instance) (rate(xrpld_job_queued_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", "legendFormat": "Queued/s [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "sum by (exported_instance) (rate(rippled_job_started_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", + "expr": "sum by (exported_instance) (rate(xrpld_job_started_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", "legendFormat": "Started/s [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "sum by (exported_instance) (rate(rippled_job_finished_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", + "expr": "sum by (exported_instance) (rate(xrpld_job_finished_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", "legendFormat": "Finished/s [{{exported_instance}}]" } ], @@ -94,7 +94,7 @@ "datasource": { "type": "prometheus" }, - "expr": "topk(10, rate(rippled_job_queued_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", + "expr": "topk(10, rate(xrpld_job_queued_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", "legendFormat": "{{job_type}} [{{exported_instance}}]" } ], @@ -144,7 +144,7 @@ "datasource": { "type": "prometheus" }, - "expr": "topk(10, rate(rippled_job_finished_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", + "expr": "topk(10, rate(xrpld_job_finished_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))", "legendFormat": "{{job_type}} [{{exported_instance}}]" } ], @@ -189,21 +189,21 @@ "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(rippled_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", + "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(xrpld_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", "legendFormat": "P50 [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", + "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(xrpld_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", "legendFormat": "P95 [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(rippled_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", + "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(xrpld_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", "legendFormat": "P99 [{{exported_instance}}]" } ], @@ -248,21 +248,21 @@ "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(rippled_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", + "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(xrpld_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", "legendFormat": "P50 [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", + "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(xrpld_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", "legendFormat": "P95 [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(rippled_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", + "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(xrpld_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))", "legendFormat": "P99 [{{exported_instance}}]" } ], @@ -312,7 +312,7 @@ "datasource": { "type": "prometheus" }, - "expr": "topk(10, histogram_quantile(0.95, sum by (le, job_type, exported_instance) (rate(rippled_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))))", + "expr": "topk(10, histogram_quantile(0.95, sum by (le, job_type, exported_instance) (rate(xrpld_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))))", "legendFormat": "{{job_type}} [{{exported_instance}}]" } ], @@ -366,7 +366,7 @@ "label": "Job Type", "description": "Filter by job type", "type": "query", - "query": "label_values(rippled_job_queued_total, job_type)", + "query": "label_values(xrpld_job_queued_total, job_type)", "datasource": { "type": "prometheus", "uid": "prometheus" diff --git a/docker/telemetry/grafana/dashboards/rippled-peer-quality.json b/docker/telemetry/grafana/dashboards/rippled-peer-quality.json index 036fd3bb13..9d5dfbb0b8 100644 --- a/docker/telemetry/grafana/dashboards/rippled-peer-quality.json +++ b/docker/telemetry/grafana/dashboards/rippled-peer-quality.json @@ -30,7 +30,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_peer_quality{metric=\"peer_latency_p90_ms\",exported_instance=~\"$node\"}", + "expr": "xrpld_peer_quality{metric=\"peer_latency_p90_ms\",exported_instance=~\"$node\"}", "legendFormat": "P90 Latency [{{exported_instance}}]" } ], @@ -91,7 +91,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_peer_quality{metric=\"peers_insane_count\",exported_instance=~\"$node\"}", + "expr": "xrpld_peer_quality{metric=\"peers_insane_count\",exported_instance=~\"$node\"}", "legendFormat": "Insane Peers [{{exported_instance}}]" } ], @@ -140,7 +140,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_peer_quality{metric=\"peers_higher_version_pct\",exported_instance=~\"$node\"}", + "expr": "xrpld_peer_quality{metric=\"peers_higher_version_pct\",exported_instance=~\"$node\"}", "legendFormat": "Higher Version % [{{exported_instance}}]" } ], @@ -191,7 +191,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_peer_quality{metric=\"upgrade_recommended\",exported_instance=~\"$node\"}", + "expr": "xrpld_peer_quality{metric=\"upgrade_recommended\",exported_instance=~\"$node\"}", "legendFormat": "Upgrade [{{exported_instance}}]" } ], @@ -250,7 +250,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_server_info{metric=\"peer_disconnects_resources\",exported_instance=~\"$node\"}", + "expr": "xrpld_server_info{metric=\"peer_disconnects_resources\",exported_instance=~\"$node\"}", "legendFormat": "Disconnects [{{exported_instance}}]" } ], @@ -365,7 +365,7 @@ "label": "Node", "description": "Filter by rippled node (service.instance.id)", "type": "query", - "query": "label_values(rippled_peer_quality, exported_instance)", + "query": "label_values(xrpld_peer_quality, exported_instance)", "datasource": { "type": "prometheus", "uid": "prometheus" diff --git a/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json b/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json index 577ff69783..2c6d8594fb 100644 --- a/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json +++ b/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json @@ -30,21 +30,21 @@ "datasource": { "type": "prometheus" }, - "expr": "sum by (exported_instance) (rate(rippled_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", + "expr": "sum by (exported_instance) (rate(xrpld_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", "legendFormat": "Started/s [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "sum by (exported_instance) (rate(rippled_rpc_method_finished_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", + "expr": "sum by (exported_instance) (rate(xrpld_rpc_method_finished_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", "legendFormat": "Finished/s [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "sum by (exported_instance) (rate(rippled_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", + "expr": "sum by (exported_instance) (rate(xrpld_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", "legendFormat": "Errored/s [{{exported_instance}}]" } ], @@ -94,7 +94,7 @@ "datasource": { "type": "prometheus" }, - "expr": "topk(10, rate(rippled_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", + "expr": "topk(10, rate(xrpld_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", "legendFormat": "{{method}} [{{exported_instance}}]" } ], @@ -144,7 +144,7 @@ "datasource": { "type": "prometheus" }, - "expr": "topk(10, rate(rippled_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", + "expr": "topk(10, rate(xrpld_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))", "legendFormat": "{{method}} [{{exported_instance}}]" } ], @@ -189,21 +189,21 @@ "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(rippled_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))", + "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(xrpld_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))", "legendFormat": "P50 [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))", + "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(xrpld_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))", "legendFormat": "P95 [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(rippled_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))", + "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(xrpld_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))", "legendFormat": "P99 [{{exported_instance}}]" } ], @@ -253,7 +253,7 @@ "datasource": { "type": "prometheus" }, - "expr": "topk(10, histogram_quantile(0.95, sum by (le, method, exported_instance) (rate(rippled_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m]))))", + "expr": "topk(10, histogram_quantile(0.95, sum by (le, method, exported_instance) (rate(xrpld_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m]))))", "legendFormat": "{{method}} [{{exported_instance}}]" } ], @@ -303,7 +303,7 @@ "datasource": { "type": "prometheus" }, - "expr": "topk(10, rate(rippled_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]) / (rate(rippled_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]) > 0))", + "expr": "topk(10, rate(xrpld_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]) / (rate(xrpld_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]) > 0))", "legendFormat": "{{method}} [{{exported_instance}}]" } ], @@ -375,7 +375,7 @@ "label": "RPC Method", "description": "Filter by RPC method", "type": "query", - "query": "label_values(rippled_rpc_method_started_total, method)", + "query": "label_values(xrpld_rpc_method_started_total, method)", "datasource": { "type": "prometheus", "uid": "prometheus" diff --git a/docker/telemetry/grafana/dashboards/rippled-validator-health.json b/docker/telemetry/grafana/dashboards/rippled-validator-health.json index 37c00e62ed..1a4d453bee 100644 --- a/docker/telemetry/grafana/dashboards/rippled-validator-health.json +++ b/docker/telemetry/grafana/dashboards/rippled-validator-health.json @@ -42,7 +42,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_validation_agreement{metric=\"agreement_pct_1h\",exported_instance=~\"$node\"}", + "expr": "xrpld_validation_agreement{metric=\"agreement_pct_1h\",exported_instance=~\"$node\"}", "legendFormat": "Agreement 1h [{{exported_instance}}]" } ], @@ -93,7 +93,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_validation_agreement{metric=\"agreement_pct_24h\",exported_instance=~\"$node\"}", + "expr": "xrpld_validation_agreement{metric=\"agreement_pct_24h\",exported_instance=~\"$node\"}", "legendFormat": "Agreement 24h [{{exported_instance}}]" } ], @@ -146,14 +146,14 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_validation_agreement{metric=\"agreements_1h\",exported_instance=~\"$node\"}", + "expr": "xrpld_validation_agreement{metric=\"agreements_1h\",exported_instance=~\"$node\"}", "legendFormat": "Agreements 1h [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_validation_agreement{metric=\"missed_1h\",exported_instance=~\"$node\"}", + "expr": "xrpld_validation_agreement{metric=\"missed_1h\",exported_instance=~\"$node\"}", "legendFormat": "Missed 1h [{{exported_instance}}]" } ], @@ -212,14 +212,14 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_validation_agreement{metric=\"agreements_24h\",exported_instance=~\"$node\"}", + "expr": "xrpld_validation_agreement{metric=\"agreements_24h\",exported_instance=~\"$node\"}", "legendFormat": "Agreements 24h [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_validation_agreement{metric=\"missed_24h\",exported_instance=~\"$node\"}", + "expr": "xrpld_validation_agreement{metric=\"missed_24h\",exported_instance=~\"$node\"}", "legendFormat": "Missed 24h [{{exported_instance}}]" } ], @@ -288,7 +288,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rate(rippled_validations_sent_total{exported_instance=~\"$node\"}[5m]) * 60", + "expr": "rate(xrpld_validations_sent_total{exported_instance=~\"$node\"}[5m]) * 60", "legendFormat": "Sent/min [{{exported_instance}}]" } ], @@ -337,7 +337,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rate(rippled_validations_checked_total{exported_instance=~\"$node\"}[5m]) * 60", + "expr": "rate(xrpld_validations_checked_total{exported_instance=~\"$node\"}[5m]) * 60", "legendFormat": "Checked/min [{{exported_instance}}]" } ], @@ -370,7 +370,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_validator_health{metric=\"amendment_blocked\",exported_instance=~\"$node\"}", + "expr": "xrpld_validator_health{metric=\"amendment_blocked\",exported_instance=~\"$node\"}", "legendFormat": "Blocked [{{exported_instance}}]" } ], @@ -429,7 +429,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_validator_health{metric=\"unl_expiry_days\",exported_instance=~\"$node\"}", + "expr": "xrpld_validator_health{metric=\"unl_expiry_days\",exported_instance=~\"$node\"}", "legendFormat": "UNL Expiry [{{exported_instance}}]" } ], @@ -490,7 +490,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_validator_health{metric=\"validation_quorum\",exported_instance=~\"$node\"}", + "expr": "xrpld_validator_health{metric=\"validation_quorum\",exported_instance=~\"$node\"}", "legendFormat": "Quorum [{{exported_instance}}]" } ], @@ -523,7 +523,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_state_tracking{metric=\"state_value\",exported_instance=~\"$node\"}", + "expr": "xrpld_state_tracking{metric=\"state_value\",exported_instance=~\"$node\"}", "legendFormat": "State [{{exported_instance}}]" } ], @@ -568,7 +568,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_state_tracking{metric=\"time_in_current_state_seconds\",exported_instance=~\"$node\"}", + "expr": "xrpld_state_tracking{metric=\"time_in_current_state_seconds\",exported_instance=~\"$node\"}", "legendFormat": "Time in State [{{exported_instance}}]" } ], @@ -601,7 +601,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rate(rippled_state_changes_total{exported_instance=~\"$node\"}[1h])", + "expr": "rate(xrpld_state_changes_total{exported_instance=~\"$node\"}[1h])", "legendFormat": "Changes/hr [{{exported_instance}}]" } ], @@ -650,7 +650,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rate(rippled_ledgers_closed_total{exported_instance=~\"$node\"}[5m]) * 60", + "expr": "rate(xrpld_ledgers_closed_total{exported_instance=~\"$node\"}[5m]) * 60", "legendFormat": "Closed/min [{{exported_instance}}]" } ], @@ -688,7 +688,7 @@ "label": "Node", "description": "Filter by rippled node (service.instance.id)", "type": "query", - "query": "label_values(rippled_validation_agreement, exported_instance)", + "query": "label_values(xrpld_validation_agreement, exported_instance)", "datasource": { "type": "prometheus", "uid": "prometheus" diff --git a/docker/telemetry/grafana/dashboards/system-node-health.json b/docker/telemetry/grafana/dashboards/system-node-health.json index 159f106344..fec7bec7cc 100644 --- a/docker/telemetry/grafana/dashboards/system-node-health.json +++ b/docker/telemetry/grafana/dashboards/system-node-health.json @@ -437,21 +437,21 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_total\"}", + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_total\"}", "legendFormat": "Reads Total [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_hit\"}", + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_hit\"}", "legendFormat": "Reads Hit (cache) [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"node_writes\"}", + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_writes\"}", "legendFormat": "Writes Total [{{exported_instance}}]" } ], @@ -492,14 +492,14 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"write_load\"}", + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"write_load\"}", "legendFormat": "Write Load [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_nodestore_state{exported_instance=~\"$node\", metric=\"read_queue\"}", + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_queue\"}", "legendFormat": "Read Queue [{{exported_instance}}]" } ], @@ -568,21 +568,21 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"SLE_hit_rate\"}", + "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"SLE_hit_rate\"}", "legendFormat": "SLE Hit Rate [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"ledger_hit_rate\"}", + "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"ledger_hit_rate\"}", "legendFormat": "Ledger Hit Rate [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"AL_hit_rate\"}", + "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"AL_hit_rate\"}", "legendFormat": "AcceptedLedger Hit Rate [{{exported_instance}}]" } ], @@ -629,21 +629,21 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"treenode_cache_size\"}", + "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"treenode_cache_size\"}", "legendFormat": "TreeNode Cache [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"treenode_track_size\"}", + "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"treenode_track_size\"}", "legendFormat": "TreeNode Track [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_cache_metrics{exported_instance=~\"$node\", metric=\"fullbelow_size\"}", + "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"fullbelow_size\"}", "legendFormat": "FullBelow [{{exported_instance}}]" } ], @@ -701,7 +701,7 @@ "datasource": { "type": "prometheus" }, - "expr": "topk(15, rippled_object_count{exported_instance=~\"$node\", type=~\"$type\"})", + "expr": "topk(15, xrpld_object_count{exported_instance=~\"$node\", type=~\"$type\"})", "legendFormat": "{{type}} [{{exported_instance}}]" } ], @@ -754,7 +754,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_server_info{exported_instance=~\"$node\", metric=\"server_state\"}", + "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"server_state\"}", "legendFormat": "State [{{exported_instance}}]" } ], @@ -809,7 +809,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_server_info{exported_instance=~\"$node\", metric=\"uptime\"}", + "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"uptime\"}", "legendFormat": "Uptime [{{exported_instance}}]" } ], @@ -842,7 +842,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_server_info{exported_instance=~\"$node\", metric=\"peers\"}", + "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"peers\"}", "legendFormat": "Peers [{{exported_instance}}]" } ], @@ -875,7 +875,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_server_info{exported_instance=~\"$node\", metric=\"validated_ledger_seq\"}", + "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"validated_ledger_seq\"}", "legendFormat": "Seq [{{exported_instance}}]" } ], @@ -908,14 +908,14 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_server_info{exported_instance=~\"$node\", metric=\"last_close_proposers\"}", + "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"last_close_proposers\"}", "legendFormat": "Proposers [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_server_info{exported_instance=~\"$node\", metric=\"last_close_converge_time_ms\"}", + "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"last_close_converge_time_ms\"}", "legendFormat": "Converge Time ms [{{exported_instance}}]" } ], @@ -956,7 +956,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_build_info{exported_instance=~\"$node\"}", + "expr": "xrpld_build_info{exported_instance=~\"$node\"}", "legendFormat": "v{{version}} [{{exported_instance}}]" } ], @@ -998,7 +998,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_complete_ledgers{exported_instance=~\"$node\"}", + "expr": "xrpld_complete_ledgers{exported_instance=~\"$node\"}", "legendFormat": "{{bound}} [range {{index}}] [{{exported_instance}}]", "format": "table", "instant": true @@ -1033,21 +1033,21 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_db_metrics{exported_instance=~\"$node\", metric=\"db_kb_total\"}", + "expr": "xrpld_db_metrics{exported_instance=~\"$node\", metric=\"db_kb_total\"}", "legendFormat": "Total KB [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_db_metrics{exported_instance=~\"$node\", metric=\"db_kb_ledger\"}", + "expr": "xrpld_db_metrics{exported_instance=~\"$node\", metric=\"db_kb_ledger\"}", "legendFormat": "Ledger KB [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "rippled_db_metrics{exported_instance=~\"$node\", metric=\"db_kb_transaction\"}", + "expr": "xrpld_db_metrics{exported_instance=~\"$node\", metric=\"db_kb_transaction\"}", "legendFormat": "Transaction KB [{{exported_instance}}]" } ], @@ -1088,7 +1088,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_db_metrics{exported_instance=~\"$node\", metric=\"historical_perminute\"}", + "expr": "xrpld_db_metrics{exported_instance=~\"$node\", metric=\"historical_perminute\"}", "legendFormat": "Fetches/min [{{exported_instance}}]" } ], @@ -1121,7 +1121,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_server_info{exported_instance=~\"$node\", metric=\"peer_disconnects_resources\"}", + "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"peer_disconnects_resources\"}", "legendFormat": "Resource Disconnects [{{exported_instance}}]" } ], @@ -1173,7 +1173,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_ledger_economy{metric=\"base_fee_xrp\",exported_instance=~\"$node\"}", + "expr": "xrpld_ledger_economy{metric=\"base_fee_xrp\",exported_instance=~\"$node\"}", "legendFormat": "Base Fee [{{exported_instance}}]" } ], @@ -1206,7 +1206,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_ledger_economy{metric=\"reserve_base_xrp\",exported_instance=~\"$node\"}", + "expr": "xrpld_ledger_economy{metric=\"reserve_base_xrp\",exported_instance=~\"$node\"}", "legendFormat": "Reserve Base [{{exported_instance}}]" } ], @@ -1239,7 +1239,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_ledger_economy{metric=\"reserve_inc_xrp\",exported_instance=~\"$node\"}", + "expr": "xrpld_ledger_economy{metric=\"reserve_inc_xrp\",exported_instance=~\"$node\"}", "legendFormat": "Reserve Inc [{{exported_instance}}]" } ], @@ -1272,7 +1272,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_ledger_economy{metric=\"ledger_age_seconds\",exported_instance=~\"$node\"}", + "expr": "xrpld_ledger_economy{metric=\"ledger_age_seconds\",exported_instance=~\"$node\"}", "legendFormat": "Ledger Age [{{exported_instance}}]" } ], @@ -1321,7 +1321,7 @@ "datasource": { "type": "prometheus" }, - "expr": "rippled_ledger_economy{metric=\"transaction_rate\",exported_instance=~\"$node\"}", + "expr": "xrpld_ledger_economy{metric=\"transaction_rate\",exported_instance=~\"$node\"}", "legendFormat": "Tx Rate [{{exported_instance}}]" } ], @@ -1375,7 +1375,7 @@ "label": "Object Type", "description": "Filter by internal object type (CountedObject class name)", "type": "query", - "query": "label_values(rippled_object_count, type)", + "query": "label_values(xrpld_object_count, type)", "datasource": { "type": "prometheus", "uid": "prometheus" diff --git a/docker/telemetry/integration-test.sh b/docker/telemetry/integration-test.sh index edf5d012c4..e893146a0e 100755 --- a/docker/telemetry/integration-test.sh +++ b/docker/telemetry/integration-test.sh @@ -658,31 +658,46 @@ check_otel_metric() { } # Task 9.1: NodeStore I/O -check_otel_metric 'rippled_nodestore_state{metric="node_reads_total"}' -check_otel_metric 'rippled_nodestore_state{metric="write_load"}' +check_otel_metric 'xrpld_nodestore_state{metric="node_reads_total"}' +check_otel_metric 'xrpld_nodestore_state{metric="write_load"}' # Task 9.2: Cache hit rates -check_otel_metric 'rippled_cache_metrics{metric="SLE_hit_rate"}' -check_otel_metric 'rippled_cache_metrics{metric="treenode_cache_size"}' +check_otel_metric 'xrpld_cache_metrics{metric="SLE_hit_rate"}' +check_otel_metric 'xrpld_cache_metrics{metric="treenode_cache_size"}' # Task 9.3: TxQ metrics -check_otel_metric 'rippled_txq_metrics{metric="txq_count"}' -check_otel_metric 'rippled_txq_metrics{metric="txq_reference_fee_level"}' +check_otel_metric 'xrpld_txq_metrics{metric="txq_count"}' +check_otel_metric 'xrpld_txq_metrics{metric="txq_reference_fee_level"}' # Task 9.4: Per-RPC metrics -check_otel_metric "rippled_rpc_method_started_total" -check_otel_metric "rippled_rpc_method_finished_total" +check_otel_metric "xrpld_rpc_method_started_total" +check_otel_metric "xrpld_rpc_method_finished_total" # Task 9.5: Per-job metrics -check_otel_metric "rippled_job_queued_total" -check_otel_metric "rippled_job_finished_total" +check_otel_metric "xrpld_job_queued_total" +check_otel_metric "xrpld_job_finished_total" # Task 9.6: Counted object instances -check_otel_metric "rippled_object_count" +check_otel_metric "xrpld_object_count" # Task 9.7: Load factor breakdown -check_otel_metric 'rippled_load_factor_metrics{metric="load_factor"}' -check_otel_metric 'rippled_load_factor_metrics{metric="load_factor_server"}' +check_otel_metric 'xrpld_load_factor_metrics{metric="load_factor"}' +check_otel_metric 'xrpld_load_factor_metrics{metric="load_factor_server"}' + +# Task 7.15 / Phase 9: ValidationTracker rolling-window agreement gauge. +# MetricsRegistry::registerValidationAgreementGauge() publishes +# xrpld_validation_agreement with a `metric` label for each window +# (1h / 24h / 7d) plus the matching agreement/miss counts. The 7-day +# window matches the external xrpl-validator-dashboard parity target. +check_otel_metric 'xrpld_validation_agreement{metric="agreement_pct_1h"}' +check_otel_metric 'xrpld_validation_agreement{metric="agreement_pct_24h"}' +check_otel_metric 'xrpld_validation_agreement{metric="agreement_pct_7d"}' +check_otel_metric 'xrpld_validation_agreement{metric="agreements_1h"}' +check_otel_metric 'xrpld_validation_agreement{metric="missed_1h"}' +check_otel_metric 'xrpld_validation_agreement{metric="agreements_24h"}' +check_otel_metric 'xrpld_validation_agreement{metric="missed_24h"}' +check_otel_metric 'xrpld_validation_agreement{metric="agreements_7d"}' +check_otel_metric 'xrpld_validation_agreement{metric="missed_7d"}' # --------------------------------------------------------------------------- # Step 11: Summary