mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-06 02:07:07 +00:00
418 lines
13 KiB
JSON
418 lines
13 KiB
JSON
{
|
|
"annotations": {
|
|
"list": []
|
|
},
|
|
"description": "RPC and pathfinding metrics from beast::insight System Metrics. Requires [insight] server=otel in rippled config.",
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 1,
|
|
"id": null,
|
|
"links": [],
|
|
"panels": [
|
|
{
|
|
"title": "RPC Request Rate (System Metrics)",
|
|
"description": "Rate of RPC requests as counted by the beast::insight counter. Sourced from rpc.requests (ServerHandler.cpp:108) which increments on every HTTP and WebSocket RPC request. Compare with the span-based rpc.request rate in the RPC Performance dashboard for cross-validation.",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rate(rippled_rpc_requests_total{exported_instance=~\"$node\"}[5m])",
|
|
"legendFormat": "Requests / Sec [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "reqps"
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "RPC Response Time (System Metrics)",
|
|
"description": "P95 and P50 of RPC response time from the beast::insight timer. Sourced from the rpc.time event (ServerHandler.cpp:110) which records elapsed milliseconds for each RPC response. This measures the full HTTP handler time, not just command execution. Compare with span-based rpc.request duration.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 0
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P95 Response Time [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P50 Response Time [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ms",
|
|
"custom": {
|
|
"axisLabel": "Latency (ms)",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "RPC Response Size",
|
|
"description": "P95 and P50 of RPC response payload size in bytes. Sourced from the rpc.size event (ServerHandler.cpp:109) which records the byte length of each RPC JSON response. Large responses may indicate expensive queries (e.g. account_tx with many results) or API misuse.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 8
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_rpc_size_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P95 Response Size [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_rpc_size_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P50 Response Size [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "decbytes",
|
|
"custom": {
|
|
"axisLabel": "Size (Bytes)",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "RPC Response Time Distribution",
|
|
"description": "Distribution of RPC response times from the beast::insight timer showing P50, P90, P95, and P99 quantiles. Sourced from the rpc.time event (ServerHandler.cpp:110). Useful for detecting bimodal latency or long-tail requests.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 8
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P50 [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.9, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P90 [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P95 [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P99 [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ms",
|
|
"custom": {
|
|
"axisLabel": "Latency (ms)",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Pathfinding Fast Duration",
|
|
"description": "P95 and P50 of fast pathfinding execution time. Sourced from the pathfind_fast event (PathRequests.h:23) which records the duration of the fast pathfinding algorithm. Fast pathfinding uses a simplified search that trades accuracy for speed.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 16
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_pathfind_fast_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P95 Fast Pathfind [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_pathfind_fast_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P50 Fast Pathfind [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ms",
|
|
"custom": {
|
|
"axisLabel": "Duration (ms)",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Pathfinding Full Duration",
|
|
"description": "P95 and P50 of full pathfinding execution time. Sourced from the pathfind_full event (PathRequests.h:24) which records the duration of the exhaustive pathfinding search. Full pathfinding is more expensive and can take significantly longer than fast mode.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 16
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_pathfind_full_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P95 Full Pathfind [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_pathfind_full_bucket{exported_instance=~\"$node\"}[5m])))",
|
|
"legendFormat": "P50 Full Pathfind [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ms",
|
|
"custom": {
|
|
"axisLabel": "Duration (ms)",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Resource Warnings Rate",
|
|
"description": "Rate of resource warning events from the Resource Manager. Sourced from the warn meter (Logic.h:33) which increments when a consumer (peer or RPC client) exceeds the warning threshold for resource usage. A rising rate indicates aggressive clients that may need throttling. NOTE: This panel will show no data until the |m -> |c fix is applied in System MetricsCollector.cpp:706 (Phase 6 Task 6.1).",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 24
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rate(rippled_warn_total{exported_instance=~\"$node\"}[5m])",
|
|
"legendFormat": "Warnings / Sec [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"thresholds": {
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 0.1
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 1
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Resource Drops Rate",
|
|
"description": "Rate of resource drop events from the Resource Manager. Sourced from the drop meter (Logic.h:34) which increments when a consumer is disconnected or blocked due to excessive resource usage. Non-zero values mean the node is actively rejecting abusive connections. NOTE: This panel will show no data until the |m -> |c fix is applied in System MetricsCollector.cpp:706 (Phase 6 Task 6.1).",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 24
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "rate(rippled_drop_total{exported_instance=~\"$node\"}[5m])",
|
|
"legendFormat": "Drops / Sec [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"thresholds": {
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 0.01
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 0.1
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
}
|
|
],
|
|
"schemaVersion": 39,
|
|
"tags": ["rippled", "statsd", "rpc", "pathfinding", "telemetry"],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"name": "node",
|
|
"label": "Node",
|
|
"description": "Filter by rippled node (service.instance.id)",
|
|
"type": "query",
|
|
"query": "label_values(rippled_rpc_requests_total, exported_instance)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"includeAll": true,
|
|
"allValue": ".*",
|
|
"current": {
|
|
"text": "All",
|
|
"value": "$__all"
|
|
},
|
|
"multi": true,
|
|
"refresh": 2,
|
|
"sort": 1
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"title": "RPC & Pathfinding (System Metrics)",
|
|
"uid": "rippled-system-rpc"
|
|
}
|