mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-05 17:56:49 +00:00
MetricsRegistry emits OTel SDK metrics with the xrpld_ prefix
(MetricsRegistry.cpp defines "xrpld_nodestore_state",
"xrpld_cache_metrics", etc.), but the Phase 9 dashboards and the
Step 10c integration-test assertions introduced in 892fee638a
queried the rippled_ prefix. Every Phase 9 panel and assertion
therefore rendered "No data" or failed on a live run, even though
the underlying series were being exported correctly.
Rename the rippled_ prefix to xrpld_ for every MetricsRegistry
metric in dashboards and the integration test:
- nodestore_state, cache_metrics, txq_metrics, load_factor_metrics,
object_count
- rpc_method_started_total / _finished_total / _errored_total /
_duration_us_bucket
- job_queued_total / _started_total / _finished_total /
_queued_duration_us_bucket / _running_duration_us_bucket
- peer_quality, server_info, validator_health, ledger_economy,
db_metrics, complete_ledgers, build_info, state_tracking
- ledgers_closed_total, validations_sent_total,
validations_checked_total, state_changes_total
- validation_agreement (ValidationTracker 1h/24h/7d windows)
Also add ValidationTracker window-gauge assertions to Step 10c of
integration-test.sh so the 1h/24h/7d agreement and miss counts are
checked alongside the other Phase 9 gauges.
The rippled_ prefix is preserved for beast::insight metrics
(rippled_LedgerMaster_*, rippled_Peer_Finder_*, rippled_total_*,
rippled_Overlay_*, rippled_State_Accounting_*, rippled_transactions_*,
rippled_proposals_*, rippled_validations_Messages_*) because those
flow through the StatsD-style OTelCollector configured with
`[insight] prefix=rippled` and remain on that prefix by design.
Verified against a live 6-node consensus network: all 22 Phase 9 +
ValidationTracker assertions now report 6+ series per metric.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1402 lines
38 KiB
JSON
1402 lines
38 KiB
JSON
{
|
|
"annotations": {
|
|
"list": []
|
|
},
|
|
"description": "Node health metrics from beast::insight System Metrics. Requires [insight] server=otel in rippled config.",
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 1,
|
|
"id": null,
|
|
"links": [],
|
|
"panels": [
|
|
{
|
|
"title": "Validated Ledger Age",
|
|
"description": "Age of the most recently validated ledger in seconds. Sourced from the LedgerMaster.Validated_Ledger_Age gauge (LedgerMaster.h:373) which is updated every collection interval via the insight hook. Values above 20s indicate the node is falling behind the network.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_LedgerMaster_Validated_Ledger_Age{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Validated Age [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"thresholds": {
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 10
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 20
|
|
}
|
|
]
|
|
},
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Published Ledger Age",
|
|
"description": "Age of the most recently published ledger in seconds. Sourced from the LedgerMaster.Published_Ledger_Age gauge (LedgerMaster.h:374). Published ledger age should track close to validated ledger age. A growing gap indicates publish pipeline backlog.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 0
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_LedgerMaster_Published_Ledger_Age{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Published Age [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"thresholds": {
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 10
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 20
|
|
}
|
|
]
|
|
},
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Operating Mode Duration",
|
|
"description": "Cumulative time spent in each operating mode (Disconnected, Connected, Syncing, Tracking, Full). Sourced from State_Accounting.*_duration gauges (NetworkOPs.cpp:774-778) which report microseconds. A healthy node should spend the vast majority of time in Full mode.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 8
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Full_duration{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Full [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Tracking_duration{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Tracking [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Syncing_duration{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Syncing [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Connected_duration{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Connected [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Disconnected_duration{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Disconnected [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "µs",
|
|
"custom": {
|
|
"axisLabel": "Duration",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Operating Mode Transitions",
|
|
"description": "Count of transitions into each operating mode. Sourced from State_Accounting.*_transitions gauges (NetworkOPs.cpp:780-786). Frequent transitions out of Full mode indicate instability. Transitions to Disconnected or Syncing warrant investigation.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 8
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Full_transitions{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Full [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Tracking_transitions{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Tracking [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Syncing_transitions{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Syncing [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Connected_transitions{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Connected [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_State_Accounting_Disconnected_transitions{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Disconnected [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {
|
|
"axisLabel": "Transitions",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "I/O Latency",
|
|
"description": "P95 and P50 of the I/O service loop latency in milliseconds. Sourced from the ios_latency event (Application.cpp:438) which measures how long it takes for the io_context to process a timer callback. Values above 10ms are logged; above 500ms trigger warnings. High values indicate thread pool saturation or blocking operations.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 16
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_ios_latency_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P95 I/O Latency [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(rippled_ios_latency_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P50 I/O Latency [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ms",
|
|
"custom": {
|
|
"axisLabel": "Latency (ms)",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Job Queue Depth",
|
|
"description": "Current number of jobs waiting in the job queue. Sourced from the job_count gauge (JobQueue.cpp:26). A sustained high value indicates the node cannot process work fast enough — common during ledger replay or heavy RPC load.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 16
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rippled_job_count{exported_instance=~\"$node\"}",
|
|
"legendFormat": "Job Queue Depth [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {
|
|
"axisLabel": "Jobs",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Ledger Fetch Rate",
|
|
"description": "Rate of ledger fetch requests initiated by the node. Sourced from the ledger_fetches counter (InboundLedgers.cpp:44) which increments each time the node requests a ledger from a peer. High rates indicate the node is catching up or missing ledgers.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 24
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rate(rippled_ledger_fetches_total{exported_instance=~\"$node\"}[5m])",
|
|
"legendFormat": "Fetches / Sec [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Ledger History Mismatches",
|
|
"description": "Rate of ledger history hash mismatches. Sourced from the ledger.history.mismatch counter (LedgerHistory.cpp:16) which increments when a built ledger hash does not match the expected validated hash. Non-zero values indicate consensus divergence or database corruption.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 24
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rate(rippled_ledger_history_mismatch_total{exported_instance=~\"$node\"}[5m])",
|
|
"legendFormat": "Mismatches / Sec [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"thresholds": {
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 0.01
|
|
}
|
|
]
|
|
},
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "--- OTel: NodeStore I/O ---",
|
|
"type": "row",
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 32
|
|
},
|
|
"collapsed": false,
|
|
"panels": []
|
|
},
|
|
{
|
|
"title": "NodeStore Read/Write Totals",
|
|
"description": "Cumulative NodeStore read and write operation counts. Sourced from MetricsRegistry nodestore_state observable gauge with metric=node_reads_total, node_writes, node_reads_hit.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 33
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_total\"}",
|
|
"legendFormat": "Reads Total [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_reads_hit\"}",
|
|
"legendFormat": "Reads Hit (cache) [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"node_writes\"}",
|
|
"legendFormat": "Writes Total [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {
|
|
"axisLabel": "Operations",
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "NodeStore Write Load & Read Queue",
|
|
"description": "Instantaneous write load score and read queue depth. High write load indicates backend pressure. High read queue indicates prefetch thread saturation.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 33
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"write_load\"}",
|
|
"legendFormat": "Write Load [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_nodestore_state{exported_instance=~\"$node\", metric=\"read_queue\"}",
|
|
"legendFormat": "Read Queue [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {
|
|
"axisLabel": "Count",
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"thresholds": {
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 100
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 1000
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "--- OTel: Cache Hit Rates ---",
|
|
"type": "row",
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 41
|
|
},
|
|
"collapsed": false,
|
|
"panels": []
|
|
},
|
|
{
|
|
"title": "Cache Hit Rates",
|
|
"description": "Hit rates for SLE cache, Ledger cache, and AcceptedLedger cache. Values from 0.0 to 1.0. Low values indicate cache thrashing. Sourced from MetricsRegistry cache_metrics observable gauge.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 42
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"SLE_hit_rate\"}",
|
|
"legendFormat": "SLE Hit Rate [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"ledger_hit_rate\"}",
|
|
"legendFormat": "Ledger Hit Rate [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"AL_hit_rate\"}",
|
|
"legendFormat": "AcceptedLedger Hit Rate [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percentunit",
|
|
"min": 0,
|
|
"max": 1,
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10,
|
|
"axisLabel": "Hit Rate",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Cache Sizes",
|
|
"description": "TreeNode cache size, TreeNode track size, and FullBelow cache size. Sourced from MetricsRegistry cache_metrics observable gauge.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 42
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"treenode_cache_size\"}",
|
|
"legendFormat": "TreeNode Cache [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"treenode_track_size\"}",
|
|
"legendFormat": "TreeNode Track [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_cache_metrics{exported_instance=~\"$node\", metric=\"fullbelow_size\"}",
|
|
"legendFormat": "FullBelow [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {
|
|
"axisLabel": "Entries",
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "--- OTel: Object Instance Counts ---",
|
|
"type": "row",
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 50
|
|
},
|
|
"collapsed": false,
|
|
"panels": []
|
|
},
|
|
{
|
|
"title": "Object Instance Counts",
|
|
"description": "Live instance counts for key internal object types tracked by CountedObject<T>. Sourced from MetricsRegistry object_count observable gauge. High counts may indicate memory pressure or object leaks.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 51
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
},
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": ["last", "max"]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "topk(15, xrpld_object_count{exported_instance=~\"$node\", type=~\"$type\"})",
|
|
"legendFormat": "{{type}} [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {
|
|
"axisLabel": "Instances",
|
|
"drawStyle": "line",
|
|
"lineWidth": 1,
|
|
"fillOpacity": 5
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "--- OTel: Server Info ---",
|
|
"type": "row",
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 59
|
|
},
|
|
"collapsed": false,
|
|
"panels": []
|
|
},
|
|
{
|
|
"title": "Server State",
|
|
"description": "Current operating mode: 0=DISCONNECTED, 1=CONNECTED, 2=SYNCING, 3=TRACKING, 4=FULL. Sourced from MetricsRegistry server_info observable gauge.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 60
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"server_state\"}",
|
|
"legendFormat": "State [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"mappings": [
|
|
{
|
|
"type": "value",
|
|
"options": { "0": { "text": "DISCONNECTED", "color": "red" } }
|
|
},
|
|
{
|
|
"type": "value",
|
|
"options": { "1": { "text": "CONNECTED", "color": "orange" } }
|
|
},
|
|
{
|
|
"type": "value",
|
|
"options": { "2": { "text": "SYNCING", "color": "yellow" } }
|
|
},
|
|
{
|
|
"type": "value",
|
|
"options": { "3": { "text": "TRACKING", "color": "blue" } }
|
|
},
|
|
{
|
|
"type": "value",
|
|
"options": { "4": { "text": "FULL", "color": "green" } }
|
|
}
|
|
],
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Uptime",
|
|
"description": "Time since server started, in seconds. Sourced from MetricsRegistry server_info observable gauge via UptimeClock.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 60
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"uptime\"}",
|
|
"legendFormat": "Uptime [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Peer Count",
|
|
"description": "Total connected peers (inbound + outbound). Sourced from MetricsRegistry server_info observable gauge via overlay().size().",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 60
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"peers\"}",
|
|
"legendFormat": "Peers [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Validated Ledger Seq",
|
|
"description": "Sequence number of the most recently validated ledger. Returns 0 before first validation. Sourced from MetricsRegistry server_info observable gauge.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 60
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"validated_ledger_seq\"}",
|
|
"legendFormat": "Seq [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Last Close Info",
|
|
"description": "Proposers and convergence time from the last closed consensus round. Sourced from MetricsRegistry server_info observable gauge via getConsensusInfo().",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 68
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"last_close_proposers\"}",
|
|
"legendFormat": "Proposers [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"last_close_converge_time_ms\"}",
|
|
"legendFormat": "Converge Time ms [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Build Version",
|
|
"description": "Build version info metric. Value is always 1; version string is in the 'version' label. Sourced from MetricsRegistry build_info observable gauge.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 68
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
},
|
|
"textMode": "name"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_build_info{exported_instance=~\"$node\"}",
|
|
"legendFormat": "v{{version}} [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "--- OTel: Complete Ledgers & DB ---",
|
|
"type": "row",
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 76
|
|
},
|
|
"collapsed": false,
|
|
"panels": []
|
|
},
|
|
{
|
|
"title": "Complete Ledger Ranges",
|
|
"description": "Start and end of each contiguous complete ledger range. Parsed from getLedgerMaster().getCompleteLedgers() string. Sourced from MetricsRegistry complete_ledgers observable gauge.",
|
|
"type": "table",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 77
|
|
},
|
|
"options": {
|
|
"showHeader": true
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_complete_ledgers{exported_instance=~\"$node\"}",
|
|
"legendFormat": "{{bound}} [range {{index}}] [{{exported_instance}}]",
|
|
"format": "table",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Database Sizes",
|
|
"description": "SQLite database sizes in KB (total, ledger, transaction). Sourced from MetricsRegistry db_metrics observable gauge via getRelationalDatabase().getKBUsed*().",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 77
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_db_metrics{exported_instance=~\"$node\", metric=\"db_kb_total\"}",
|
|
"legendFormat": "Total KB [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_db_metrics{exported_instance=~\"$node\", metric=\"db_kb_ledger\"}",
|
|
"legendFormat": "Ledger KB [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_db_metrics{exported_instance=~\"$node\", metric=\"db_kb_transaction\"}",
|
|
"legendFormat": "Transaction KB [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "deckbytes",
|
|
"custom": {
|
|
"axisLabel": "Size (KB)",
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Historical Fetch Rate",
|
|
"description": "Historical ledger fetches per minute. Sourced from MetricsRegistry db_metrics observable gauge via getInboundLedgers().fetchRate().",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 85
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_db_metrics{exported_instance=~\"$node\", metric=\"historical_perminute\"}",
|
|
"legendFormat": "Fetches/min [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Peer Disconnects (Resources)",
|
|
"description": "Cumulative count of peer disconnections due to resource limits. Sourced from MetricsRegistry server_info observable gauge via overlay().getPeerDisconnectCharges().",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 85
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_server_info{exported_instance=~\"$node\", metric=\"peer_disconnects_resources\"}",
|
|
"legendFormat": "Resource Disconnects [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "--- OTel: Ledger Economy ---",
|
|
"type": "row",
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 93
|
|
},
|
|
"collapsed": false,
|
|
"panels": []
|
|
},
|
|
{
|
|
"title": "Base Fee (drops)",
|
|
"description": "Current network base transaction fee in drops. Sourced from MetricsRegistry ledger_economy observable gauge. 1 XRP = 1,000,000 drops.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 94
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_ledger_economy{metric=\"base_fee_xrp\",exported_instance=~\"$node\"}",
|
|
"legendFormat": "Base Fee [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Reserve Base (drops)",
|
|
"description": "Current account reserve base in drops. The minimum XRP balance required to maintain an account on the ledger.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 94
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_ledger_economy{metric=\"reserve_base_xrp\",exported_instance=~\"$node\"}",
|
|
"legendFormat": "Reserve Base [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Reserve Inc (drops)",
|
|
"description": "Current owner reserve increment in drops. Additional XRP required per owned object (trust lines, offers, etc.).",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 94
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_ledger_economy{metric=\"reserve_inc_xrp\",exported_instance=~\"$node\"}",
|
|
"legendFormat": "Reserve Inc [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Ledger Age",
|
|
"description": "Age of the current open ledger in seconds. Values growing beyond the expected ~4s close interval indicate the node is not closing ledgers on schedule.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 94
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_ledger_economy{metric=\"ledger_age_seconds\",exported_instance=~\"$node\"}",
|
|
"legendFormat": "Ledger Age [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"thresholds": {
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 8
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 20
|
|
}
|
|
]
|
|
},
|
|
"custom": {}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Transaction Rate",
|
|
"description": "Current transaction throughput rate from ledger economy metrics. Shows the volume of transactions being processed per interval.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 102
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "xrpld_ledger_economy{metric=\"transaction_rate\",exported_instance=~\"$node\"}",
|
|
"legendFormat": "Tx Rate [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"custom": {
|
|
"axisLabel": "Transactions",
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10,
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
}
|
|
],
|
|
"schemaVersion": 39,
|
|
"tags": ["rippled", "statsd", "otel", "node-health", "telemetry"],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"name": "node",
|
|
"label": "Node",
|
|
"description": "Filter by rippled node (service.instance.id)",
|
|
"type": "query",
|
|
"query": "label_values(rippled_LedgerMaster_Validated_Ledger_Age, exported_instance)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"includeAll": true,
|
|
"allValue": ".*",
|
|
"current": {
|
|
"text": "All",
|
|
"value": "$__all"
|
|
},
|
|
"multi": true,
|
|
"refresh": 2,
|
|
"sort": 1
|
|
},
|
|
{
|
|
"name": "type",
|
|
"label": "Object Type",
|
|
"description": "Filter by internal object type (CountedObject<T> class name)",
|
|
"type": "query",
|
|
"query": "label_values(xrpld_object_count, type)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"includeAll": true,
|
|
"allValue": ".*",
|
|
"current": {
|
|
"text": "All",
|
|
"value": "$__all"
|
|
},
|
|
"multi": true,
|
|
"refresh": 2,
|
|
"sort": 1
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"title": "Node Health (System Metrics)",
|
|
"uid": "rippled-system-node-health"
|
|
}
|