diff --git a/docker/telemetry/grafana/dashboards/rippled-job-queue.json b/docker/telemetry/grafana/dashboards/rippled-job-queue.json index dfc5cf490f..6888b240a5 100644 --- a/docker/telemetry/grafana/dashboards/rippled-job-queue.json +++ b/docker/telemetry/grafana/dashboards/rippled-job-queue.json @@ -335,6 +335,67 @@ }, "overrides": [] } + }, + { + "title": "Transaction Overflow Rate", + "description": "Rate of job queue transaction overflows per minute. Overflows occur when the job queue's transaction limit is exceeded, causing transactions to be dropped. Non-zero values indicate the node is under heavy transaction load.", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 32 + }, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "expr": "rate(xrpld_jq_trans_overflow_total{exported_instance=~\"$node\"}[5m]) * 60", + "legendFormat": "Overflows/min [{{exported_instance}}]" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "thresholds": { + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "custom": { + "axisLabel": "Overflows / Min", + "drawStyle": "line", + "lineWidth": 2, + "fillOpacity": 10, + "spanNulls": true, + "insertNulls": false, + "showPoints": "auto", + "pointSize": 3 + }, + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + } } ], "schemaVersion": 39, diff --git a/docker/telemetry/grafana/dashboards/rippled-validator-health.json b/docker/telemetry/grafana/dashboards/rippled-validator-health.json index d5c6df4d18..70a5741aaa 100644 --- a/docker/telemetry/grafana/dashboards/rippled-validator-health.json +++ b/docker/telemetry/grafana/dashboards/rippled-validator-health.json @@ -463,6 +463,139 @@ "overrides": [] } }, + { + "title": "UNL Blocked", + "description": "Whether the node's UNL (Unique Node List) is blocked (1=blocked, 0=normal). A UNL-blocked node cannot determine validator trust and may stop participating in consensus.", + "type": "stat", + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 18 + }, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_validator_health{metric=\"unl_blocked\",exported_instance=~\"$node\"}", + "legendFormat": "UNL Blocked [{{exported_instance}}]" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "OK", + "color": "green" + } + } + }, + { + "type": "value", + "options": { + "1": { + "text": "BLOCKED", + "color": "red" + } + } + } + ], + "thresholds": { + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "custom": {} + }, + "overrides": [] + } + }, + { + "title": "Agreement/Missed Counters (Rate)", + "description": "Rate of cumulative validation agreements and misses per minute. These monotonic counters complement the rolling window percentages above.", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 18, + "x": 6, + "y": 18 + }, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "expr": "rate(xrpld_validation_agreements_total{exported_instance=~\"$node\"}[5m]) * 60", + "legendFormat": "Agreements/min [{{exported_instance}}]" + }, + { + "datasource": { + "type": "prometheus" + }, + "expr": "rate(xrpld_validation_missed_total{exported_instance=~\"$node\"}[5m]) * 60", + "legendFormat": "Missed/min [{{exported_instance}}]" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "axisLabel": "Per Minute", + "drawStyle": "line", + "lineWidth": 2, + "fillOpacity": 10, + "spanNulls": true, + "insertNulls": false, + "showPoints": "auto", + "pointSize": 3 + }, + "color": { + "mode": "palette-classic" + } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "Missed.*" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "red" + } + } + ] + } + ] + } + }, { "title": "--- Server State & Consensus ---", "type": "row", @@ -470,7 +603,7 @@ "h": 1, "w": 24, "x": 0, - "y": 18 + "y": 26 }, "collapsed": false, "panels": [] @@ -483,7 +616,7 @@ "h": 8, "w": 6, "x": 0, - "y": 19 + "y": 27 }, "options": { "tooltip": { @@ -516,7 +649,7 @@ "h": 8, "w": 18, "x": 6, - "y": 19 + "y": 27 }, "options": { "tooltip": { @@ -561,7 +694,7 @@ "h": 8, "w": 8, "x": 0, - "y": 27 + "y": 35 }, "options": { "tooltip": { @@ -594,7 +727,7 @@ "h": 8, "w": 8, "x": 8, - "y": 27 + "y": 35 }, "options": { "tooltip": { @@ -643,7 +776,7 @@ "h": 8, "w": 8, "x": 16, - "y": 27 + "y": 35 }, "options": { "tooltip": { diff --git a/docker/telemetry/grafana/dashboards/system-node-health.json b/docker/telemetry/grafana/dashboards/system-node-health.json index 7827ef8bb9..14a655cd2f 100644 --- a/docker/telemetry/grafana/dashboards/system-node-health.json +++ b/docker/telemetry/grafana/dashboards/system-node-health.json @@ -535,6 +535,116 @@ "overrides": [] } }, + { + "title": "NodeStore Bytes Read/Written", + "description": "Cumulative bytes read and written by the NodeStore backend. Sourced from MetricsRegistry nodestore_state observable gauge with metric=node_written_bytes, node_read_bytes.", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_read_bytes\"}", + "legendFormat": "Bytes Read [{{exported_instance}}]" + }, + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_written_bytes\"}", + "legendFormat": "Bytes Written [{{exported_instance}}]" + } + ], + "fieldConfig": { + "defaults": { + "unit": "decbytes", + "custom": { + "axisLabel": "Bytes", + "drawStyle": "line", + "lineWidth": 2, + "fillOpacity": 10 + }, + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + } + }, + { + "title": "NodeStore Read Threads & Duration", + "description": "Read thread utilization and cumulative read duration. read_threads_running/read_threads_total shows thread saturation. node_reads_duration_us tracks cumulative time spent in read I/O. read_request_bundle tracks bundled read operations.", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_threads_running\"}", + "legendFormat": "Read Threads Running [{{exported_instance}}]" + }, + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_threads_total\"}", + "legendFormat": "Read Threads Total [{{exported_instance}}]" + }, + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_request_bundle\"}", + "legendFormat": "Read Request Bundle [{{exported_instance}}]" + }, + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_duration_us\"}", + "legendFormat": "Read Duration (\u00b5s) [{{exported_instance}}]" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "axisLabel": "Count / \u00b5s", + "drawStyle": "line", + "lineWidth": 2, + "fillOpacity": 10 + }, + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + } + }, { "title": "--- OTel: Cache Hit Rates ---", "type": "row", @@ -542,7 +652,7 @@ "h": 1, "w": 24, "x": 0, - "y": 41 + "y": 49 }, "collapsed": false, "panels": [] @@ -555,7 +665,7 @@ "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 50 }, "options": { "tooltip": { @@ -610,13 +720,13 @@ }, { "title": "Cache Sizes", - "description": "TreeNode cache size, TreeNode track size, and FullBelow cache size. Sourced from MetricsRegistry cache_metrics observable gauge.", + "description": "TreeNode cache size, TreeNode track size, and FullBelow cache size. Sourced from MetricsRegistry cache_metrics observable gauge. Also includes AcceptedLedger (AL) cache size.", "type": "timeseries", "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 42 + "y": 50 }, "options": { "tooltip": { @@ -645,6 +755,13 @@ }, "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"fullbelow_size\"}", "legendFormat": "FullBelow [{{exported_instance}}]" + }, + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"AL_size\"}", + "legendFormat": "AcceptedLedger Size [{{exported_instance}}]" } ], "fieldConfig": { @@ -670,7 +787,7 @@ "h": 1, "w": 24, "x": 0, - "y": 50 + "y": 58 }, "collapsed": false, "panels": [] @@ -683,7 +800,7 @@ "h": 8, "w": 24, "x": 0, - "y": 51 + "y": 59 }, "options": { "tooltip": { @@ -728,7 +845,7 @@ "h": 1, "w": 24, "x": 0, - "y": 59 + "y": 67 }, "collapsed": false, "panels": [] @@ -741,7 +858,7 @@ "h": 8, "w": 6, "x": 0, - "y": 60 + "y": 68 }, "options": { "tooltip": { @@ -821,7 +938,7 @@ "h": 8, "w": 6, "x": 6, - "y": 60 + "y": 68 }, "options": { "tooltip": { @@ -854,7 +971,7 @@ "h": 8, "w": 6, "x": 12, - "y": 60 + "y": 68 }, "options": { "tooltip": { @@ -887,7 +1004,7 @@ "h": 8, "w": 6, "x": 18, - "y": 60 + "y": 68 }, "options": { "tooltip": { @@ -920,7 +1037,7 @@ "h": 8, "w": 12, "x": 0, - "y": 68 + "y": 76 }, "options": { "tooltip": { @@ -967,7 +1084,7 @@ "h": 8, "w": 12, "x": 12, - "y": 68 + "y": 76 }, "options": { "tooltip": { @@ -993,6 +1110,80 @@ "overrides": [] } }, + { + "title": "Current Ledger Index", + "description": "Current open ledger sequence number. The gap between this and validated_ledger_seq represents ledgers in flight.", + "type": "stat", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 84 + }, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"ledger_current_index\"}", + "legendFormat": "Current Ledger [{{exported_instance}}]" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": {} + }, + "overrides": [] + } + }, + { + "title": "NuDB Storage Size", + "description": "NuDB backend file size in bytes. Sourced from MetricsRegistry storage_detail observable gauge. Tracks database growth over time.", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 84 + }, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "expr": "xrpld_storage_detail{exported_instance=~\"$node\", metric=\"nudb_bytes\"}", + "legendFormat": "NuDB Size [{{exported_instance}}]" + } + ], + "fieldConfig": { + "defaults": { + "unit": "decbytes", + "custom": { + "axisLabel": "Size", + "drawStyle": "line", + "lineWidth": 2, + "fillOpacity": 10 + }, + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + } + }, { "title": "--- OTel: Complete Ledgers & DB ---", "type": "row", @@ -1000,7 +1191,7 @@ "h": 1, "w": 24, "x": 0, - "y": 76 + "y": 92 }, "collapsed": false, "panels": [] @@ -1013,7 +1204,7 @@ "h": 8, "w": 12, "x": 0, - "y": 77 + "y": 93 }, "options": { "showHeader": true @@ -1045,7 +1236,7 @@ "h": 8, "w": 12, "x": 12, - "y": 77 + "y": 93 }, "options": { "tooltip": { @@ -1100,7 +1291,7 @@ "h": 8, "w": 12, "x": 0, - "y": 85 + "y": 101 }, "options": { "tooltip": { @@ -1133,7 +1324,7 @@ "h": 8, "w": 12, "x": 12, - "y": 85 + "y": 101 }, "options": { "tooltip": { @@ -1172,7 +1363,7 @@ "h": 1, "w": 24, "x": 0, - "y": 93 + "y": 109 }, "collapsed": false, "panels": [] @@ -1185,7 +1376,7 @@ "h": 8, "w": 6, "x": 0, - "y": 94 + "y": 110 }, "options": { "tooltip": { @@ -1218,7 +1409,7 @@ "h": 8, "w": 6, "x": 6, - "y": 94 + "y": 110 }, "options": { "tooltip": { @@ -1251,7 +1442,7 @@ "h": 8, "w": 6, "x": 12, - "y": 94 + "y": 110 }, "options": { "tooltip": { @@ -1284,7 +1475,7 @@ "h": 8, "w": 6, "x": 18, - "y": 94 + "y": 110 }, "options": { "tooltip": { @@ -1333,7 +1524,7 @@ "h": 8, "w": 24, "x": 0, - "y": 102 + "y": 118 }, "options": { "tooltip": {