Merge branch 'pratik/otel-phase8-log-correlation' into pratik/otel-phase9-metric-gap-fill

This commit is contained in:
Pratik Mankawde
2026-06-04 10:53:56 +01:00

View File

@@ -29,14 +29,14 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m]))",
"legendFormat": "tx.process / Sec [{{exported_instance}}]"
},
{
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.receive\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.receive\", tx_type=~\"$tx_type\"}[5m]))",
"legendFormat": "tx.receive / Sec [{{exported_instance}}]"
}
],
@@ -55,8 +55,8 @@
}
},
{
"title": "Transaction Processing Latency",
"description": "p95 and p50 latency of transaction processing (tx.process span). Measures the time from when a transaction enters processTransaction() to completion. Computed via histogram_quantile() over the spanmetrics duration histogram with a 5m rate window.",
"title": "Transaction Processing Latency by Type",
"description": "Per-transaction-type processing latency (p95 and p50). Filter with $tx_type variable above.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -68,6 +68,11 @@
"tooltip": {
"mode": "multi",
"sort": "desc"
},
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": ["mean", "max", "lastNotNull"]
}
},
"targets": [
@@ -75,15 +80,15 @@
"datasource": {
"type": "prometheus"
},
"expr": "histogram_quantile(0.95, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\"}[5m])))",
"legendFormat": "P95 [{{exported_instance}}]"
"expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])))",
"legendFormat": "p95 {{tx_type}}"
},
{
"datasource": {
"type": "prometheus"
},
"expr": "histogram_quantile(0.50, sum by (le, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\"}[5m])))",
"legendFormat": "P50 [{{exported_instance}}]"
"expr": "histogram_quantile(0.50, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])))",
"legendFormat": "p50 {{tx_type}}"
}
],
"fieldConfig": {
@@ -121,7 +126,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", local=~\"$tx_origin\", span_name=\"tx.process\"}[5m]))",
"expr": "sum by (local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", local=~\"$tx_origin\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m]))",
"legendFormat": "Local = {{local}} [{{exported_instance}}]"
}
]
@@ -147,7 +152,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (suppressed, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"tx.receive\", exported_instance=~\"$node\"}[$__rate_interval]))",
"expr": "sum by (suppressed, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"tx.receive\", tx_type=~\"$tx_type\", exported_instance=~\"$node\"}[$__rate_interval]))",
"legendFormat": "Suppressed={{suppressed}} [{{exported_instance}}]"
}
],
@@ -189,7 +194,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\"}[5m])) by (le)",
"expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])) by (le)",
"legendFormat": "{{le}}",
"format": "heatmap"
}
@@ -262,7 +267,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.receive\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.receive\", tx_type=~\"$tx_type\"}[5m]))",
"legendFormat": "tx.receive / Sec [{{exported_instance}}]"
}
],
@@ -354,7 +359,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (tx_type) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type!=\"\"}[5m]))",
"expr": "sum by (tx_type) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m]))",
"legendFormat": "{{tx_type}}"
}
],
@@ -398,7 +403,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (tx_type, ter_result) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", ter_result!=\"\", ter_result!=\"tesSUCCESS\"}[5m]))",
"expr": "sum by (tx_type, ter_result) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\", ter_result=~\"$ter_result\", ter_result!=\"tesSUCCESS\"}[5m]))",
"legendFormat": "{{tx_type}}: {{ter_result}}"
}
],
@@ -441,7 +446,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (txq_status) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"txq.accept_tx\", txq_status!=\"\"}[5m]))",
"expr": "sum by (txq_status) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=\"txq.accept_tx\", txq_status=~\"$txq_status\"}[5m]))",
"legendFormat": "{{txq_status}}"
}
],
@@ -478,7 +483,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.transactor\", tx_type!=\"\"}[5m])))",
"expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.transactor\", tx_type=~\"$tx_type\"}[5m])))",
"legendFormat": "p95 {{tx_type}}"
}
],
@@ -495,50 +500,6 @@
},
"overrides": []
}
},
{
"title": "TX Processing Latency by Type (p95)",
"description": "Per-transaction-type processing latency (tx.process span duration). Shows which transaction types take longest to process end-to-end.",
"type": "timeseries",
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 48
},
"options": {
"tooltip": {
"mode": "multi",
"sort": "desc"
},
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": ["mean", "max", "lastNotNull"]
}
},
"targets": [
{
"datasource": {
"type": "prometheus"
},
"expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type!=\"\"}[5m])))",
"legendFormat": "p95 {{tx_type}}"
}
],
"fieldConfig": {
"defaults": {
"unit": "ms",
"custom": {
"axisLabel": "Latency (ms)",
"spanNulls": true,
"insertNulls": false,
"showPoints": "auto",
"pointSize": 3
}
},
"overrides": []
}
}
],
"schemaVersion": 39,
@@ -584,6 +545,57 @@
"multi": true,
"refresh": 2,
"sort": 1
},
{
"name": "tx_type",
"type": "query",
"datasource": {
"type": "prometheus"
},
"query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\", tx_type!=\"\"}, tx_type)",
"refresh": 2,
"includeAll": true,
"multi": true,
"allValue": ".*",
"current": {
"text": "All",
"value": "$__all"
},
"sort": 1
},
{
"name": "ter_result",
"type": "query",
"datasource": {
"type": "prometheus"
},
"query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\", ter_result!=\"\"}, ter_result)",
"refresh": 2,
"includeAll": true,
"multi": true,
"allValue": ".*",
"current": {
"text": "All",
"value": "$__all"
},
"sort": 1
},
{
"name": "txq_status",
"type": "query",
"datasource": {
"type": "prometheus"
},
"query": "label_values(traces_span_metrics_calls_total{span_name=\"txq.accept_tx\", txq_status!=\"\"}, txq_status)",
"refresh": 2,
"includeAll": true,
"multi": true,
"allValue": ".*",
"current": {
"text": "All",
"value": "$__all"
},
"sort": 1
}
]
},