From 7397bbcdd208ff0c89fe8e877b1767d71a00257d Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Thu, 4 Jun 2026 10:53:45 +0100 Subject: [PATCH] feat(telemetry): add tx_type/ter_result/txq_status dashboard filters Adds template variables $tx_type, $ter_result, $txq_status to the Transaction Overview dashboard. All relevant panels now respect these filters, enabling operators to drill into specific transaction types or result codes. Changes: - Panel 2 renamed to "Transaction Processing Latency by Type" (now shows p95/p50 per tx_type instead of aggregate) - Panels 1,3,4,5,7,9,12 filter by $tx_type - Panel 10 filters by $tx_type and $ter_result - Panel 11 filters by $txq_status - Removed redundant "TX Processing Latency by Type (p95)" panel Co-Authored-By: Claude Opus 4.6 --- .../dashboards/transaction-overview.json | 132 ++++++++++-------- 1 file changed, 72 insertions(+), 60 deletions(-) diff --git a/docker/telemetry/grafana/dashboards/transaction-overview.json b/docker/telemetry/grafana/dashboards/transaction-overview.json index 0d4072ff09..fc18d31c77 100644 --- a/docker/telemetry/grafana/dashboards/transaction-overview.json +++ b/docker/telemetry/grafana/dashboards/transaction-overview.json @@ -29,14 +29,14 @@ "datasource": { "type": "prometheus" }, - "expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\"}[5m]))", + "expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m]))", "legendFormat": "tx.process / Sec [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.receive\"}[5m]))", + "expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.receive\", tx_type=~\"$tx_type\"}[5m]))", "legendFormat": "tx.receive / Sec [{{exported_instance}}]" } ], @@ -55,8 +55,8 @@ } }, { - "title": "Transaction Processing Latency", - "description": "p95 and p50 latency of transaction processing (tx.process span). Measures the time from when a transaction enters processTransaction() to completion. Computed via histogram_quantile() over the spanmetrics duration histogram with a 5m rate window.", + "title": "Transaction Processing Latency by Type", + "description": "Per-transaction-type processing latency (p95 and p50). Filter with $tx_type variable above.", "type": "timeseries", "gridPos": { "h": 8, @@ -68,6 +68,11 @@ "tooltip": { "mode": "multi", "sort": "desc" + }, + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": ["mean", "max", "lastNotNull"] } }, "targets": [ @@ -75,15 +80,15 @@ "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\"}[5m])))", - "legendFormat": "P95 [{{exported_instance}}]" + "expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])))", + "legendFormat": "p95 {{tx_type}}" }, { "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\"}[5m])))", - "legendFormat": "P50 [{{exported_instance}}]" + "expr": "histogram_quantile(0.50, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])))", + "legendFormat": "p50 {{tx_type}}" } ], "fieldConfig": { @@ -121,7 +126,7 @@ "datasource": { "type": "prometheus" }, - "expr": "sum by (local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", local=~\"$tx_origin\", span_name=\"tx.process\"}[5m]))", + "expr": "sum by (local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", local=~\"$tx_origin\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m]))", "legendFormat": "Local = {{local}} [{{exported_instance}}]" } ] @@ -147,7 +152,7 @@ "datasource": { "type": "prometheus" }, - "expr": "sum by (suppressed, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"tx.receive\", exported_instance=~\"$node\"}[$__rate_interval]))", + "expr": "sum by (suppressed, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"tx.receive\", tx_type=~\"$tx_type\", exported_instance=~\"$node\"}[$__rate_interval]))", "legendFormat": "Suppressed={{suppressed}} [{{exported_instance}}]" } ], @@ -189,7 +194,7 @@ "datasource": { "type": "prometheus" }, - "expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\"}[5m])) by (le)", + "expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])) by (le)", "legendFormat": "{{le}}", "format": "heatmap" } @@ -262,7 +267,7 @@ "datasource": { "type": "prometheus" }, - "expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.receive\"}[5m]))", + "expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.receive\", tx_type=~\"$tx_type\"}[5m]))", "legendFormat": "tx.receive / Sec [{{exported_instance}}]" } ], @@ -354,7 +359,7 @@ "datasource": { "type": "prometheus" }, - "expr": "sum by (tx_type) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type!=\"\"}[5m]))", + "expr": "sum by (tx_type) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m]))", "legendFormat": "{{tx_type}}" } ], @@ -398,7 +403,7 @@ "datasource": { "type": "prometheus" }, - "expr": "sum by (tx_type, ter_result) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", ter_result!=\"\", ter_result!=\"tesSUCCESS\"}[5m]))", + "expr": "sum by (tx_type, ter_result) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\", ter_result=~\"$ter_result\", ter_result!=\"tesSUCCESS\"}[5m]))", "legendFormat": "{{tx_type}}: {{ter_result}}" } ], @@ -441,7 +446,7 @@ "datasource": { "type": "prometheus" }, - "expr": "sum by (txq_status) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"txq.accept_tx\", txq_status!=\"\"}[5m]))", + "expr": "sum by (txq_status) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"txq.accept_tx\", txq_status=~\"$txq_status\"}[5m]))", "legendFormat": "{{txq_status}}" } ], @@ -478,7 +483,7 @@ "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.transactor\", tx_type!=\"\"}[5m])))", + "expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.transactor\", tx_type=~\"$tx_type\"}[5m])))", "legendFormat": "p95 {{tx_type}}" } ], @@ -495,50 +500,6 @@ }, "overrides": [] } - }, - { - "title": "TX Processing Latency by Type (p95)", - "description": "Per-transaction-type processing latency (tx.process span duration). Shows which transaction types take longest to process end-to-end.", - "type": "timeseries", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 48 - }, - "options": { - "tooltip": { - "mode": "multi", - "sort": "desc" - }, - "legend": { - "displayMode": "table", - "placement": "right", - "calcs": ["mean", "max", "lastNotNull"] - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus" - }, - "expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type!=\"\"}[5m])))", - "legendFormat": "p95 {{tx_type}}" - } - ], - "fieldConfig": { - "defaults": { - "unit": "ms", - "custom": { - "axisLabel": "Latency (ms)", - "spanNulls": true, - "insertNulls": false, - "showPoints": "auto", - "pointSize": 3 - } - }, - "overrides": [] - } } ], "schemaVersion": 39, @@ -584,6 +545,57 @@ "multi": true, "refresh": 2, "sort": 1 + }, + { + "name": "tx_type", + "type": "query", + "datasource": { + "type": "prometheus" + }, + "query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\", tx_type!=\"\"}, tx_type)", + "refresh": 2, + "includeAll": true, + "multi": true, + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + }, + "sort": 1 + }, + { + "name": "ter_result", + "type": "query", + "datasource": { + "type": "prometheus" + }, + "query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\", ter_result!=\"\"}, ter_result)", + "refresh": 2, + "includeAll": true, + "multi": true, + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + }, + "sort": 1 + }, + { + "name": "txq_status", + "type": "query", + "datasource": { + "type": "prometheus" + }, + "query": "label_values(traces_span_metrics_calls_total{span_name=\"txq.accept_tx\", txq_status!=\"\"}, txq_status)", + "refresh": 2, + "includeAll": true, + "multi": true, + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + }, + "sort": 1 } ] },