mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-05 17:56:49 +00:00
MetricsRegistry emits OTel SDK metrics with the xrpld_ prefix
(MetricsRegistry.cpp defines "xrpld_nodestore_state",
"xrpld_cache_metrics", etc.), but the Phase 9 dashboards and the
Step 10c integration-test assertions introduced in 892fee638a
queried the rippled_ prefix. Every Phase 9 panel and assertion
therefore rendered "No data" or failed on a live run, even though
the underlying series were being exported correctly.
Rename the rippled_ prefix to xrpld_ for every MetricsRegistry
metric in dashboards and the integration test:
- nodestore_state, cache_metrics, txq_metrics, load_factor_metrics,
object_count
- rpc_method_started_total / _finished_total / _errored_total /
_duration_us_bucket
- job_queued_total / _started_total / _finished_total /
_queued_duration_us_bucket / _running_duration_us_bucket
- peer_quality, server_info, validator_health, ledger_economy,
db_metrics, complete_ledgers, build_info, state_tracking
- ledgers_closed_total, validations_sent_total,
validations_checked_total, state_changes_total
- validation_agreement (ValidationTracker 1h/24h/7d windows)
Also add ValidationTracker window-gauge assertions to Step 10c of
integration-test.sh so the 1h/24h/7d agreement and miss counts are
checked alongside the other Phase 9 gauges.
The rippled_ prefix is preserved for beast::insight metrics
(rippled_LedgerMaster_*, rippled_Peer_Finder_*, rippled_total_*,
rippled_Overlay_*, rippled_State_Accounting_*, rippled_transactions_*,
rippled_proposals_*, rippled_validations_Messages_*) because those
flow through the StatsD-style OTelCollector configured with
`[insight] prefix=rippled` and remain on that prefix by design.
Verified against a live 6-node consensus network: all 22 Phase 9 +
ValidationTracker assertions now report 6+ series per metric.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
396 lines
11 KiB
JSON
396 lines
11 KiB
JSON
{
|
|
"annotations": {
|
|
"list": []
|
|
},
|
|
"description": "Job queue analysis: per-job-type throughput rates, queue wait times, and execution times. Sourced from OTel MetricsRegistry synchronous counters and histograms (Phase 9).",
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 1,
|
|
"id": null,
|
|
"links": [],
|
|
"panels": [
|
|
{
|
|
"title": "Job Throughput Rate (Per Second)",
|
|
"description": "Rate of jobs queued, started, and finished across all job types. Computed as rate() over the OTel counter values. High queue rates with low finish rates indicate backlog.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "sum by (exported_instance) (rate(xrpld_job_queued_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
|
|
"legendFormat": "Queued/s [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "sum by (exported_instance) (rate(xrpld_job_started_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
|
|
"legendFormat": "Started/s [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "sum by (exported_instance) (rate(xrpld_job_finished_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
|
|
"legendFormat": "Finished/s [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10,
|
|
"axisLabel": "Operations / Sec",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Per-Job-Type Queued Rate",
|
|
"description": "Rate of jobs queued broken down by job_type label. Identifies which job types contribute most to queue activity.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 8
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
},
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": ["mean", "max"]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "topk(10, rate(xrpld_job_queued_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
|
|
"legendFormat": "{{job_type}} [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineWidth": 1,
|
|
"fillOpacity": 5,
|
|
"axisLabel": "Operations / Sec",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Per-Job-Type Finish Rate",
|
|
"description": "Rate of jobs completing broken down by job_type. Compare with queued rate to identify backlog per type.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 8
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
},
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": ["mean", "max"]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "topk(10, rate(xrpld_job_finished_total{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))",
|
|
"legendFormat": "{{job_type}} [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineWidth": 1,
|
|
"fillOpacity": 5,
|
|
"axisLabel": "Operations / Sec",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Job Queue Wait Time (P50, P95, P99)",
|
|
"description": "Histogram quantiles for time jobs spend waiting in the queue before execution starts. High values indicate thread pool saturation.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 16
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(xrpld_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
|
|
"legendFormat": "P50 [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(xrpld_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
|
|
"legendFormat": "P95 [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(xrpld_job_queued_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
|
|
"legendFormat": "P99 [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "us",
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 5,
|
|
"axisLabel": "Duration (μs)",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Job Execution Time (P50, P95, P99)",
|
|
"description": "Histogram quantiles for actual job execution time. High values indicate expensive operations or resource contention.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 16
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(xrpld_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
|
|
"legendFormat": "P50 [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(xrpld_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
|
|
"legendFormat": "P95 [{{exported_instance}}]"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "histogram_quantile(0.99, sum by (le, exported_instance) (rate(xrpld_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m])))",
|
|
"legendFormat": "P99 [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "us",
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 5,
|
|
"axisLabel": "Duration (μs)",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
},
|
|
{
|
|
"title": "Per-Job-Type Execution Time (P95)",
|
|
"description": "95th percentile execution time broken down by job type. Identifies the slowest job types.",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 24
|
|
},
|
|
"options": {
|
|
"tooltip": {
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
},
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": ["mean", "max"]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus"
|
|
},
|
|
"expr": "topk(10, histogram_quantile(0.95, sum by (le, job_type, exported_instance) (rate(xrpld_job_running_duration_us_bucket{exported_instance=~\"$node\", job_type=~\"$job_type\"}[5m]))))",
|
|
"legendFormat": "{{job_type}} [{{exported_instance}}]"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "us",
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineWidth": 1,
|
|
"fillOpacity": 5,
|
|
"axisLabel": "Duration (μs)",
|
|
"spanNulls": true,
|
|
"insertNulls": false,
|
|
"showPoints": "auto",
|
|
"pointSize": 3
|
|
},
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
}
|
|
}
|
|
],
|
|
"schemaVersion": 39,
|
|
"tags": ["rippled", "otel", "job-queue"],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"name": "node",
|
|
"label": "Node",
|
|
"description": "Filter by rippled node (service.instance.id)",
|
|
"type": "query",
|
|
"query": "label_values(exported_instance)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"includeAll": true,
|
|
"allValue": ".*",
|
|
"current": {
|
|
"text": "All",
|
|
"value": "$__all"
|
|
},
|
|
"multi": true,
|
|
"refresh": 2,
|
|
"sort": 1
|
|
},
|
|
{
|
|
"name": "job_type",
|
|
"label": "Job Type",
|
|
"description": "Filter by job type",
|
|
"type": "query",
|
|
"query": "label_values(xrpld_job_queued_total, job_type)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"includeAll": true,
|
|
"allValue": ".*",
|
|
"current": {
|
|
"text": "All",
|
|
"value": "$__all"
|
|
},
|
|
"multi": true,
|
|
"refresh": 2,
|
|
"sort": 1
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {},
|
|
"timezone": "browser",
|
|
"title": "Job Queue Analysis",
|
|
"uid": "rippled-job-queue",
|
|
"version": 1
|
|
}
|