rippled/docker/telemetry/grafana/dashboards/rippled-rpc-perf.json

{
  "annotations": {
    "list": []
  },
  "description": "Per-RPC-method performance: call rates, error rates, and latency distributions. Sourced from OTel MetricsRegistry synchronous counters and histograms (Phase 9).",
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 1,
  "id": null,
  "links": [],
  "panels": [
    {
      "title": "RPC Call Rate (All Methods)",
      "description": "Aggregate rate of RPC calls started, finished, and errored across all methods. Computed as rate() over OTel counters.",
      "type": "timeseries",
      "gridPos": {
        "h": 8,
        "w": 24,
        "x": 0,
        "y": 0
      },
      "options": {
        "tooltip": {
          "mode": "multi",
          "sort": "desc"
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "sum by (exported_instance) (rate(xrpld_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
          "legendFormat": "Started/s [{{exported_instance}}]"
        },
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "sum by (exported_instance) (rate(xrpld_rpc_method_finished_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
          "legendFormat": "Finished/s [{{exported_instance}}]"
        },
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "sum by (exported_instance) (rate(xrpld_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
          "legendFormat": "Errored/s [{{exported_instance}}]"
        }
      ],
      "fieldConfig": {
        "defaults": {
          "unit": "ops",
          "custom": {
            "drawStyle": "line",
            "lineWidth": 2,
            "fillOpacity": 10,
            "axisLabel": "Operations / Sec",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
            "pointSize": 3
          },
          "color": {
            "mode": "palette-classic"
          }
        },
        "overrides": []
      }
    },
    {
      "title": "Per-Method Call Rate (Top 10)",
      "description": "Per-method RPC call rate, showing the 10 most active methods. Useful for identifying hot paths.",
      "type": "timeseries",
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 8
      },
      "options": {
        "tooltip": {
          "mode": "multi",
          "sort": "desc"
        },
        "legend": {
          "displayMode": "table",
          "placement": "right",
          "calcs": ["mean", "max"]
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "topk(10, rate(xrpld_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
          "legendFormat": "{{method}} [{{exported_instance}}]"
        }
      ],
      "fieldConfig": {
        "defaults": {
          "unit": "ops",
          "custom": {
            "drawStyle": "line",
            "lineWidth": 1,
            "fillOpacity": 5,
            "axisLabel": "Operations / Sec",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
            "pointSize": 3
          },
          "color": {
            "mode": "palette-classic"
          }
        },
        "overrides": []
      }
    },
    {
      "title": "Per-Method Error Rate (Top 10)",
      "description": "Per-method RPC error rate. Non-zero values warrant investigation. Common culprits: invalid parameters, resource exhaustion.",
      "type": "timeseries",
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 8
      },
      "options": {
        "tooltip": {
          "mode": "multi",
          "sort": "desc"
        },
        "legend": {
          "displayMode": "table",
          "placement": "right",
          "calcs": ["mean", "max"]
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "topk(10, rate(xrpld_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]))",
          "legendFormat": "{{method}} [{{exported_instance}}]"
        }
      ],
      "fieldConfig": {
        "defaults": {
          "unit": "ops",
          "custom": {
            "drawStyle": "line",
            "lineWidth": 1,
            "fillOpacity": 5,
            "axisLabel": "Operations / Sec",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
            "pointSize": 3
          },
          "color": {
            "mode": "palette-classic"
          }
        },
        "overrides": []
      }
    },
    {
      "title": "RPC Latency (P50, P95, P99) - All Methods",
      "description": "Histogram quantiles for RPC execution time across all methods. Sourced from rpc_method_duration_us histogram.",
      "type": "timeseries",
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 16
      },
      "options": {
        "tooltip": {
          "mode": "multi",
          "sort": "desc"
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(xrpld_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))",
          "legendFormat": "P50 [{{exported_instance}}]"
        },
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(xrpld_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))",
          "legendFormat": "P95 [{{exported_instance}}]"
        },
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(xrpld_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m])))",
          "legendFormat": "P99 [{{exported_instance}}]"
        }
      ],
      "fieldConfig": {
        "defaults": {
          "unit": "us",
          "custom": {
            "drawStyle": "line",
            "lineWidth": 2,
            "fillOpacity": 5,
            "axisLabel": "Duration (μs)",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
            "pointSize": 3
          },
          "color": {
            "mode": "palette-classic"
          }
        },
        "overrides": []
      }
    },
    {
      "title": "Per-Method Latency P95 (Top 10 Slowest)",
      "description": "95th percentile execution time per method. Identifies the slowest RPC endpoints.",
      "type": "timeseries",
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 16
      },
      "options": {
        "tooltip": {
          "mode": "multi",
          "sort": "desc"
        },
        "legend": {
          "displayMode": "table",
          "placement": "right",
          "calcs": ["mean", "max"]
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "topk(10, histogram_quantile(0.95, sum by (le, method, exported_instance) (rate(xrpld_rpc_method_duration_us_bucket{exported_instance=~\"$node\", method=~\"$method\"}[5m]))))",
          "legendFormat": "{{method}} [{{exported_instance}}]"
        }
      ],
      "fieldConfig": {
        "defaults": {
          "unit": "us",
          "custom": {
            "drawStyle": "line",
            "lineWidth": 1,
            "fillOpacity": 5,
            "axisLabel": "Duration (μs)",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
            "pointSize": 3
          },
          "color": {
            "mode": "palette-classic"
          }
        },
        "overrides": []
      }
    },
    {
      "title": "RPC Error Ratio by Method",
      "description": "Error ratio (errors / total started) per method. Values above 0.05 (5%) warrant investigation.",
      "type": "timeseries",
      "gridPos": {
        "h": 8,
        "w": 24,
        "x": 0,
        "y": 24
      },
      "options": {
        "tooltip": {
          "mode": "multi",
          "sort": "desc"
        },
        "legend": {
          "displayMode": "table",
          "placement": "right",
          "calcs": ["mean", "max"]
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus"
          },
          "expr": "topk(10, rate(xrpld_rpc_method_errored_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]) / (rate(xrpld_rpc_method_started_total{exported_instance=~\"$node\", method=~\"$method\"}[5m]) > 0))",
          "legendFormat": "{{method}} [{{exported_instance}}]"
        }
      ],
      "fieldConfig": {
        "defaults": {
          "unit": "percentunit",
          "min": 0,
          "max": 1,
          "custom": {
            "drawStyle": "line",
            "lineWidth": 1,
            "fillOpacity": 5,
            "axisLabel": "Error Ratio",
            "spanNulls": true,
            "insertNulls": false,
            "showPoints": "auto",
            "pointSize": 3
          },
          "color": {
            "mode": "palette-classic"
          },
          "thresholds": {
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "yellow",
                "value": 0.05
              },
              {
                "color": "red",
                "value": 0.25
              }
            ]
          }
        },
        "overrides": []
      }
    }
  ],
  "schemaVersion": 39,
  "tags": ["rippled", "otel", "rpc"],
  "templating": {
    "list": [
      {
        "name": "node",
        "label": "Node",
        "description": "Filter by rippled node (service.instance.id)",
        "type": "query",
        "query": "label_values(exported_instance)",
        "datasource": {
          "type": "prometheus",
          "uid": "prometheus"
        },
        "includeAll": true,
        "allValue": ".*",
        "current": {
          "text": "All",
          "value": "$__all"
        },
        "multi": true,
        "refresh": 2,
        "sort": 1
      },
      {
        "name": "method",
        "label": "RPC Method",
        "description": "Filter by RPC method",
        "type": "query",
        "query": "label_values(xrpld_rpc_method_started_total, method)",
        "datasource": {
          "type": "prometheus",
          "uid": "prometheus"
        },
        "includeAll": true,
        "allValue": ".*",
        "current": {
          "text": "All",
          "value": "$__all"
        },
        "multi": true,
        "refresh": 2,
        "sort": 1
      }
    ]
  },
  "time": {
    "from": "now-1h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "browser",
  "title": "RPC Performance (OTel)",
  "uid": "rippled-rpc-perf",
  "version": 1
}