{ "annotations": { "list": [] }, "description": "RPC and pathfinding metrics from beast::insight System Metrics. Requires [insight] server=otel in rippled config.", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, "links": [], "panels": [ { "title": "RPC Request Rate (System Metrics)", "description": "Rate of RPC requests as counted by the beast::insight counter. Sourced from rpc.requests (ServerHandler.cpp:108) which increments on every HTTP and WebSocket RPC request. Compare with the span-based rpc.request rate in the RPC Performance dashboard for cross-validation.", "type": "stat", "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ { "datasource": { "type": "prometheus" }, "expr": "rate(rippled_rpc_requests_total{exported_instance=~\"$node\"}[5m])", "legendFormat": "Requests / Sec [{{exported_instance}}]" } ], "fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] } }, { "title": "RPC Response Time (System Metrics)", "description": "P95 and P50 of RPC response time from the beast::insight timer. Sourced from the rpc.time event (ServerHandler.cpp:110) which records elapsed milliseconds for each RPC response. This measures the full HTTP handler time, not just command execution. Compare with span-based rpc.request duration.", "type": "timeseries", "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P95 Response Time [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P50 Response Time [{{exported_instance}}]" } ], "fieldConfig": { "defaults": { "unit": "ms", "custom": { "axisLabel": "Latency (ms)", "spanNulls": true, "insertNulls": false, "showPoints": "auto", "pointSize": 3 } }, "overrides": [] } }, { "title": "RPC Response Size", "description": "P95 and P50 of RPC response payload size in bytes. Sourced from the rpc.size event (ServerHandler.cpp:109) which records the byte length of each RPC JSON response. Large responses may indicate expensive queries (e.g. account_tx with many results) or API misuse.", "type": "timeseries", "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_rpc_size_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P95 Response Size [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_rpc_size_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P50 Response Size [{{exported_instance}}]" } ], "fieldConfig": { "defaults": { "unit": "decbytes", "custom": { "axisLabel": "Size (Bytes)", "spanNulls": true, "insertNulls": false, "showPoints": "auto", "pointSize": 3 } }, "overrides": [] } }, { "title": "RPC Response Time Distribution", "description": "Distribution of RPC response times from the beast::insight timer showing P50, P90, P95, and P99 quantiles. Sourced from the rpc.time event (ServerHandler.cpp:110). Useful for detecting bimodal latency or long-tail requests.", "type": "timeseries", "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P50 [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.9, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P90 [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P95 [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(rippled_rpc_time_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P99 [{{exported_instance}}]" } ], "fieldConfig": { "defaults": { "unit": "ms", "custom": { "axisLabel": "Latency (ms)", "spanNulls": true, "insertNulls": false, "showPoints": "auto", "pointSize": 3 } }, "overrides": [] } }, { "title": "Pathfinding Fast Duration", "description": "P95 and P50 of fast pathfinding execution time. Sourced from the pathfind_fast event (PathRequests.h:23) which records the duration of the fast pathfinding algorithm. Fast pathfinding uses a simplified search that trades accuracy for speed.", "type": "timeseries", "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_pathfind_fast_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P95 Fast Pathfind [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_pathfind_fast_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P50 Fast Pathfind [{{exported_instance}}]" } ], "fieldConfig": { "defaults": { "unit": "ms", "custom": { "axisLabel": "Duration (ms)", "spanNulls": true, "insertNulls": false, "showPoints": "auto", "pointSize": 3 } }, "overrides": [] } }, { "title": "Pathfinding Full Duration", "description": "P95 and P50 of full pathfinding execution time. Sourced from the pathfind_full event (PathRequests.h:24) which records the duration of the exhaustive pathfinding search. Full pathfinding is more expensive and can take significantly longer than fast mode.", "type": "timeseries", "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(rippled_pathfind_full_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P95 Full Pathfind [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, "expr": "histogram_quantile(0.5, sum by (le, exported_instance) (rate(rippled_pathfind_full_bucket{exported_instance=~\"$node\"}[5m])))", "legendFormat": "P50 Full Pathfind [{{exported_instance}}]" } ], "fieldConfig": { "defaults": { "unit": "ms", "custom": { "axisLabel": "Duration (ms)", "spanNulls": true, "insertNulls": false, "showPoints": "auto", "pointSize": 3 } }, "overrides": [] } }, { "title": "Resource Warnings Rate", "description": "Rate of resource warning events from the Resource Manager. Sourced from the warn meter (Logic.h:33) which increments when a consumer (peer or RPC client) exceeds the warning threshold for resource usage. A rising rate indicates aggressive clients that may need throttling. NOTE: This panel will show no data until the |m -> |c fix is applied in System MetricsCollector.cpp:706 (Phase 6 Task 6.1).", "type": "stat", "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ { "datasource": { "type": "prometheus" }, "expr": "rate(rippled_warn_total{exported_instance=~\"$node\"}[5m])", "legendFormat": "Warnings / Sec [{{exported_instance}}]" } ], "fieldConfig": { "defaults": { "unit": "ops", "thresholds": { "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 0.1 }, { "color": "red", "value": 1 } ] } }, "overrides": [] } }, { "title": "Resource Drops Rate", "description": "Rate of resource drop events from the Resource Manager. Sourced from the drop meter (Logic.h:34) which increments when a consumer is disconnected or blocked due to excessive resource usage. Non-zero values mean the node is actively rejecting abusive connections. NOTE: This panel will show no data until the |m -> |c fix is applied in System MetricsCollector.cpp:706 (Phase 6 Task 6.1).", "type": "stat", "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ { "datasource": { "type": "prometheus" }, "expr": "rate(rippled_drop_total{exported_instance=~\"$node\"}[5m])", "legendFormat": "Drops / Sec [{{exported_instance}}]" } ], "fieldConfig": { "defaults": { "unit": "ops", "thresholds": { "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 0.01 }, { "color": "red", "value": 0.1 } ] } }, "overrides": [] } } ], "schemaVersion": 39, "tags": ["rippled", "statsd", "rpc", "pathfinding", "telemetry"], "templating": { "list": [ { "name": "node", "label": "Node", "description": "Filter by rippled node (service.instance.id)", "type": "query", "query": "label_values(rippled_rpc_requests_total, exported_instance)", "datasource": { "type": "prometheus", "uid": "prometheus" }, "includeAll": true, "allValue": ".*", "current": { "text": "All", "value": "$__all" }, "multi": true, "refresh": 2, "sort": 1 } ] }, "time": { "from": "now-1h", "to": "now" }, "title": "RPC & Pathfinding (System Metrics)", "uid": "rippled-system-rpc" }