Merge branch 'pratik/otel-phase6-statsd' into pratik/otel-phase7-native-metrics

# Conflicts: # OpenTelemetryPlan/09-data-collection-reference.md
2026-07-23 23:20:33 +00:00 · 2026-06-05 12:48:31 +01:00
parent 0a800069bf 3167a49f41
commit 283fbaa54f
12 changed files with 574 additions and 76 deletions
--- a/.github/scripts/levelization/results/ordering.txt
+++ b/.github/scripts/levelization/results/ordering.txt
@@ -242,6 +242,7 @@ xrpl.tx > xrpl.basics
 xrpl.tx > xrpl.core
 xrpl.tx > xrpl.ledger
 xrpl.tx > xrpl.protocol
+xrpl.tx > xrpl.telemetry
 xrpld.app > test.unit_test
 xrpld.app > xrpl.basics
 xrpld.app > xrpl.core
--- a/OpenTelemetryPlan/09-data-collection-reference.md
+++ b/OpenTelemetryPlan/09-data-collection-reference.md
@@ -99,13 +99,23 @@ Controlled by `trace_rpc=1` in `[telemetry]` config.

 Controlled by `trace_transactions=1` in `[telemetry]` config.

-| Span Name    | Parent         | Source File     | Description                                                       |
-| ------------ | -------------- | --------------- | ----------------------------------------------------------------- |
-| `tx.process` | —              | NetworkOPs.cpp  | Transaction submission entry point (local or peer-relayed)        |
-| `tx.receive` | —              | PeerImp.cpp     | Raw transaction received from peer overlay (before deduplication) |
-| `tx.apply`   | `ledger.build` | BuildLedger.cpp | Transaction set applied to new ledger during consensus            |
+| Span Name       | Parent         | Source File     | Description                                                       |
+| --------------- | -------------- | --------------- | ----------------------------------------------------------------- |
+| `tx.process`    | —              | NetworkOPs.cpp  | Transaction submission entry point (local or peer-relayed)        |
+| `tx.receive`    | —              | PeerImp.cpp     | Raw transaction received from peer overlay (before deduplication) |
+| `tx.apply`      | `ledger.build` | BuildLedger.cpp | Transaction set applied to new ledger during consensus            |
+| `tx.preflight`  | —              | applySteps.cpp  | Stateless checks stage (`stage=preflight`)                        |
+| `tx.preclaim`   | —              | applySteps.cpp  | Ledger-aware checks stage before fee claim (`stage=preclaim`)     |
+| `tx.transactor` | —              | Transactor.cpp  | Apply stage — the transactor runs (`stage=apply`)                 |
+
+The three apply-pipeline spans share a deterministic `trace_id` derived from
+`txID[0:16]`, so preflight, preclaim, and transactor for one transaction group
+under a single trace even though they run sequentially and often on different
+threads. A transaction that hard-fails preflight or preclaim never reaches the
+later spans — the `stage` attribute identifies where it stopped.

 **Where to find**: Tempo → TraceQL: `{resource.service.name="xrpld" && name=~"tx.process|tx.receive"}`
+or, for the apply pipeline: `{resource.service.name="xrpld" && name=~"tx.preflight|tx.preclaim|tx.transactor"}`

 **Grafana dashboard**: _Transaction Overview_ (`xrpld-transactions`)

@@ -177,13 +187,19 @@ Every span can carry key-value attributes that provide context for filtering and

 #### Transaction Attributes

-| Attribute            | Type    | Set On                     | Description                                          |
-| -------------------- | ------- | -------------------------- | ---------------------------------------------------- |
-| `xrpl.tx.hash`       | string  | `tx.process`, `tx.receive` | Transaction hash (hex-encoded)                       |
-| `xrpl.tx.local`      | boolean | `tx.process`               | `true` if locally submitted, `false` if peer-relayed |
-| `xrpl.tx.path`       | string  | `tx.process`               | Submission path: `"sync"` or `"async"`               |
-| `xrpl.tx.suppressed` | boolean | `tx.receive`               | `true` if transaction was suppressed (duplicate)     |
-| `xrpl.tx.status`     | string  | `tx.receive`               | Transaction status (e.g., `"known_bad"`)             |
+| Attribute           | Type    | Set On                                         | Description                                                           |
+| ------------------- | ------- | ---------------------------------------------- | --------------------------------------------------------------------- |
+| `xrpl.tx.hash`      | string  | `tx.process`, `tx.receive`                     | Transaction hash (hex-encoded)                                        |
+| `local`             | boolean | `tx.process`                                   | `true` if locally submitted, `false` if peer-relayed                  |
+| `path`              | string  | `tx.process`                                   | Submission path: `"sync"` or `"async"`                                |
+| `suppressed`        | boolean | `tx.receive`                                   | `true` if transaction was suppressed (duplicate)                      |
+| `tx_status`         | string  | `tx.receive`                                   | Transaction status (e.g., `"known_bad"`)                              |
+| `xrpl.peer.id`      | int64   | `tx.receive`                                   | Peer identifier (also set on peer spans)                              |
+| `xrpl.peer.version` | string  | `tx.receive`                                   | Peer protocol version string                                          |
+| `stage`             | string  | `tx.preflight`, `tx.preclaim`, `tx.transactor` | Apply-pipeline stage: `preflight`, `preclaim`, or `apply`             |
+| `tx_type`           | string  | `tx.preflight`, `tx.preclaim`, `tx.transactor` | Transaction type name (e.g., `Payment`)                               |
+| `ter_result`        | string  | `tx.preflight`, `tx.preclaim`, `tx.transactor` | Engine result token for that stage (e.g., `tesSUCCESS`, `terPRE_SEQ`) |
+| `applied`           | boolean | `tx.transactor`                                | `true` if the transaction was applied to the ledger                   |

 **Tempo query**: `{span.xrpl.tx.hash="<hash>"}` to trace a specific transaction across nodes.

@@ -248,14 +264,25 @@ The OTel Collector's SpanMetrics connector automatically generates RED (Rate, Er

 **Additional dimension labels** (configured in `otel-collector-config.yaml`):

-| Span Attribute                 | Prometheus Label               | Applies To                |
-| ------------------------------ | ------------------------------ | ------------------------- |
-| `command`                      | `xrpl_rpc_command`             | `rpc.command.*`           |
-| `rpc_status`                   | `xrpl_rpc_status`              | `rpc.command.*`           |
-| `xrpl.consensus.mode`          | `xrpl_consensus_mode`          | `consensus.ledger_close`  |
-| `xrpl.tx.local`                | `xrpl_tx_local`                | `tx.process`              |
-| `xrpl.peer.proposal.trusted`   | `xrpl_peer_proposal_trusted`   | `peer.proposal.receive`   |
-| `xrpl.peer.validation.trusted` | `xrpl_peer_validation_trusted` | `peer.validation.receive` |
+| Span Attribute        | Prometheus Label               | Applies To                                     |
+| --------------------- | ------------------------------ | ---------------------------------------------- |
+| `command`             | `xrpl_rpc_command`             | `rpc.command.*`                                |
+| `rpc_status`          | `xrpl_rpc_status`              | `rpc.command.*`                                |
+| `xrpl.consensus.mode` | `xrpl_consensus_mode`          | `consensus.ledger_close`                       |
+| `local`               | `xrpl_tx_local`                | `tx.process`                                   |
+| `proposal_trusted`    | `xrpl_peer_proposal_trusted`   | `peer.proposal.receive`                        |
+| `validation_trusted`  | `xrpl_peer_validation_trusted` | `peer.validation.receive`                      |
+| `stage`               | `stage`                        | `tx.preflight`, `tx.preclaim`, `tx.transactor` |
+
+The `stage` dimension (3 values: `preflight`, `preclaim`, `apply`) turns the
+apply-pipeline spans into per-stage RED metrics with no native instruments — the
+_Transaction Overview_ dashboard charts rate, p95 latency, and failure rate by stage.
+
+> **Sampling caveat**: span-derived metrics inherit the **tracer head-sampling**
+> ratio (`sampling_ratio` in `[telemetry]`, via `TraceIdRatioBasedSampler`). At
+> `sampling_ratio < 1.0` the stage RED metrics undercount proportionally — they
+> reflect sampled traces, not the full transaction volume. Native StatsD/meter
+> metrics do not sample. Account for this when reading absolute stage rates.

 **Where to query**: Prometheus → `traces_span_metrics_calls_total{span_name="rpc.command.server_info"}`

--- a/OpenTelemetryPlan/Phase3_taskList.md
+++ b/OpenTelemetryPlan/Phase3_taskList.md
@@ -474,17 +474,22 @@ This gives the best of both worlds: guaranteed cross-node correlation via determ

 **Attributes added**:

-| Span            | Attribute        | Type   | Source                                                              |
-| --------------- | ---------------- | ------ | ------------------------------------------------------------------- |
-| `tx.process`    | `tx_type`        | string | `TxFormats::getInstance().findByType(stx->getTxnType())->getName()` |
-| `tx.process`    | `fee`            | int64  | `stx->getFieldAmount(sfFee).xrp().drops()`                          |
-| `tx.process`    | `sequence`       | int64  | `stx->getSeqProxy().value()`                                        |
-| `tx.process`    | `ter_result`     | string | `transToken(e.result)` (set after batch application)                |
-| `tx.process`    | `applied`        | bool   | `e.applied` (set after batch application)                           |
-| `tx.receive`    | `tx_type`        | string | `TxFormats::getInstance().findByType(stx->getTxnType())->getName()` |
-| `txq.enqueue`   | `tx_type`        | string | same pattern as above                                               |
-| `txq.accept.tx` | `txq_status`     | string | `applied` / `failed` / `retried`                                    |
-| `txq.accept`    | `ledger_changed` | bool   | set at end of accept loop                                           |
+| Span              | Attribute            | Type   | Source                                                              |
+| ----------------- | -------------------- | ------ | ------------------------------------------------------------------- |
+| `tx.process`      | `tx_type`            | string | `TxFormats::getInstance().findByType(stx->getTxnType())->getName()` |
+| `tx.process`      | `fee`                | int64  | `stx->getFieldAmount(sfFee).xrp().drops()`                          |
+| `tx.process`      | `sequence`           | int64  | `stx->getSeqProxy().value()`                                        |
+| `tx.process`      | `ter_result`         | string | `transToken(e.result)` (set after batch application)                |
+| `tx.process`      | `applied`            | bool   | `e.applied` (set after batch application)                           |
+| `tx.receive`      | `tx_type`            | string | `TxFormats::getInstance().findByType(stx->getTxnType())->getName()` |
+| `txq.enqueue`     | `tx_type`            | string | same pattern as above                                               |
+| `txq.enqueue`     | `txq_status`         | string | `queued` / `applied_direct` / `applied` / `rejected`                |
+| `txq.enqueue`     | `fee_level_paid`     | int64  | `getFeeLevelPaid(view, *tx).value()`                                |
+| `txq.enqueue`     | `required_fee_level` | int64  | `getRequiredFeeLevel(...).value()`                                  |
+| `txq.batch_clear` | `num_cleared`        | int64  | queued txs cleared ahead of the applying tx                         |
+| `txq.cleanup`     | `expired_count`      | int64  | entries dropped for passed `LastLedgerSequence`                     |
+| `txq.accept.tx`   | `txq_status`         | string | `applied` / `failed` / `retried`                                    |
+| `txq.accept`      | `ledger_changed`     | bool   | set at end of accept loop                                           |

 **New attr keys**: `TxSpanNames.h` (`txType`, `fee`, `sequence`, `terResult`, `applied`), `TxQSpanNames.h` (`txType`).

--- a/docker/telemetry/grafana/dashboards/transaction-overview.json
+++ b/docker/telemetry/grafana/dashboards/transaction-overview.json
@@ -669,6 +669,138 @@
        },
        "overrides": []
      }
+    },
+    {
+      "title": "Tx Apply Pipeline Rate by Stage",
+      "description": "Span rate for each apply-pipeline stage (preflight, preclaim, apply). A drop between stages shows where transactions are filtered out. Requires the stage dimension in spanmetrics.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 64
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "sum by (stage, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=~\"tx.preflight|tx.preclaim|tx.transactor\", stage=~\"$stage\"}[5m]))",
+          "legendFormat": "{{stage}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "custom": {
+            "axisLabel": "Spans / Sec",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Tx Apply Pipeline Latency by Stage (p95)",
+      "description": "95th-percentile duration of each apply-pipeline stage. Isolates which stage (preflight, preclaim, apply) dominates transaction processing time.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 64
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "histogram_quantile(0.95, sum by (le, stage, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=~\"tx.preflight|tx.preclaim|tx.transactor\", stage=~\"$stage\"}[5m])))",
+          "legendFormat": "P95 {{stage}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ms",
+          "custom": {
+            "axisLabel": "Duration (ms)",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
+    },
+    {
+      "title": "Tx Apply Pipeline Failure Rate by Stage",
+      "description": "Rate of apply-pipeline spans whose ter_result is not tesSUCCESS, split by stage. Shows whether failures concentrate in preflight, preclaim, or apply. Filters on ter_result rather than span status because a failing ter code completes the span normally; only thrown exceptions set an error status.",
+      "type": "timeseries",
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 72
+      },
+      "options": {
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        },
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "calcs": ["mean", "max"]
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus"
+          },
+          "expr": "sum by (stage, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", span_name=~\"tx.preflight|tx.preclaim|tx.transactor\", stage=~\"$stage\", ter_result!~\"tesSUCCESS|\"}[5m]))",
+          "legendFormat": "{{stage}} [{{exported_instance}}]"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "ops",
+          "custom": {
+            "axisLabel": "Failed Spans / Sec",
+            "spanNulls": true,
+            "insertNulls": false,
+            "showPoints": "auto",
+            "pointSize": 3
+          }
+        },
+        "overrides": []
+      }
    }
  ],
  "schemaVersion": 39,
@@ -768,6 +900,24 @@
        },
        "sort": 1,
        "label": "Queue Status"
+      },
+      {
+        "name": "stage",
+        "type": "query",
+        "datasource": {
+          "type": "prometheus"
+        },
+        "query": "label_values(traces_span_metrics_calls_total{span_name=~\"tx.preflight|tx.preclaim|tx.transactor\", stage!=\"\"}, stage)",
+        "refresh": 2,
+        "includeAll": true,
+        "multi": true,
+        "allValue": ".*",
+        "current": {
+          "text": "All",
+          "value": "$__all"
+        },
+        "sort": 1,
+        "label": "Apply Stage"
      }
    ]
  },
--- a/docker/telemetry/otel-collector-config.yaml
+++ b/docker/telemetry/otel-collector-config.yaml
@@ -41,6 +41,9 @@ connectors:
      - name: validation_trusted
      - name: tx_type
      - name: ter_result
+      # Apply-pipeline stage (preflight|preclaim|apply) — splits the
+      # tx.preflight/tx.preclaim/tx.transactor span RED metrics per stage.
+      - name: stage
      - name: txq_status
      - name: consensus_state
      - name: load_type
--- a/docs/telemetry-runbook.md
+++ b/docs/telemetry-runbook.md
@@ -74,11 +74,20 @@ All spans instrumented in xrpld, grouped by subsystem:

 ### Transaction Spans (Phase 3)

-| Span Name    | Source File     | Attributes                                                                        | Description                           |
-| ------------ | --------------- | --------------------------------------------------------------------------------- | ------------------------------------- |
-| `tx.process` | NetworkOPs.cpp  | `tx_hash`, `local`, `path`, `tx_type`, `fee`, `sequence`, `ter_result`, `applied` | Transaction submission and processing |
-| `tx.receive` | PeerImp.cpp     | `peer_id`, `tx_hash`, `tx_type`, `peer_version`, `suppressed`, `tx_status`        | Transaction received from peer relay  |
-| `tx.apply`   | BuildLedger.cpp | `ledger_seq`, `tx_count`, `tx_failed`                                             | Transaction set applied per ledger    |
+| Span Name       | Source File     | Attributes                                                                        | Description                           |
+| --------------- | --------------- | --------------------------------------------------------------------------------- | ------------------------------------- |
+| `tx.process`    | NetworkOPs.cpp  | `tx_hash`, `local`, `path`, `tx_type`, `fee`, `sequence`, `ter_result`, `applied` | Transaction submission and processing |
+| `tx.receive`    | PeerImp.cpp     | `peer_id`, `tx_hash`, `tx_type`, `peer_version`, `suppressed`, `tx_status`        | Transaction received from peer relay  |
+| `tx.apply`      | BuildLedger.cpp | `ledger_seq`, `tx_count`, `tx_failed`                                             | Transaction set applied per ledger    |
+| `tx.preflight`  | applySteps.cpp  | `stage`, `tx_type`, `ter_result`                                                  | Stateless checks stage                |
+| `tx.preclaim`   | applySteps.cpp  | `stage`, `tx_type`, `ter_result`                                                  | Ledger-aware checks stage             |
+| `tx.transactor` | Transactor.cpp  | `stage`, `tx_type`, `ter_result`, `applied`                                       | Apply stage (transactor runs)         |
+
+The three apply-pipeline spans (`tx.preflight`, `tx.preclaim`, `tx.transactor`)
+share a deterministic `trace_id` from `txID[0:16]`, so they group under one
+trace per transaction. The `stage` attribute (`preflight` / `preclaim` /
+`apply`) drives the collector spanmetrics `stage` dimension, giving per-stage
+RED metrics on the _Transaction Overview_ dashboard.

 ### Transaction Queue Spans (Phase 3)

@@ -182,6 +191,43 @@ This section shows what questions you can answer using the span attributes, with
 {name=~"tx\\..*"} | tx_type = "NFTokenMint"
 ```

+### Apply Pipeline by Stage
+
+```
+# All three stages of one transaction (preflight -> preclaim -> apply)
+{name=~"tx.preflight|tx.preclaim|tx.transactor"}
+
+# Transactions that failed at the preclaim stage
+{name="tx.preclaim"} | ter_result != "tesSUCCESS"
+
+# Transactions that hard-failed preflight (never reached preclaim/apply)
+{name="tx.preflight"} | ter_result != "tesSUCCESS"
+```
+
+PromQL on the span-derived metrics (dashboard: _Transaction Overview_):
+
+```
+# Per-stage throughput — the funnel preflight >= preclaim >= apply
+sum by (stage) (rate(traces_span_metrics_calls_total{span_name=~"tx.preflight|tx.preclaim|tx.transactor"}[5m]))
+
+# Per-stage p95 latency
+histogram_quantile(0.95, sum by (le, stage) (rate(traces_span_metrics_duration_milliseconds_bucket{span_name=~"tx.preflight|tx.preclaim|tx.transactor"}[5m])))
+
+# Per-stage failure rate (ter_result != tesSUCCESS; a failing ter completes the
+# span normally, so filter on the attribute, not status_code which only flags exceptions)
+sum by (stage) (rate(traces_span_metrics_calls_total{span_name=~"tx.preflight|tx.preclaim|tx.transactor", ter_result!~"tesSUCCESS|"}[5m]))
+```
+
+> **Alerting**: a rising `tx.preflight` / `tx.preclaim` failure rate points to
+> malformed or stale-sequence submissions (often spam or a misbehaving client);
+> a rising `tx.transactor` failure rate points to apply-time problems. Alert per
+> stage rather than on a single aggregate so the failing stage is obvious.
+
+> **Sampling caveat**: these stage metrics are span-derived and inherit the
+> **tracer head-sampling** ratio (`sampling_ratio`). At `sampling_ratio < 1.0`
+> they undercount proportionally — treat them as relative trends, not absolute
+> transaction counts. Native StatsD metrics are unsampled.
+
 ### Transaction Queue Health

 ```
--- a/include/xrpl/tx/detail/TxApplySpanNames.h
+++ b/include/xrpl/tx/detail/TxApplySpanNames.h
@@ -0,0 +1,109 @@
+#pragma once
+
+/** Compile-time span name constants for the transaction apply pipeline.
+ *
+ *  Defines the span names and attribute keys used by the three apply-pipeline
+ *  stages — preflight, preclaim, and transactor (apply) — that run inside the
+ *  library (`src/libxrpl/tx/`). Built on the StaticStr/join() primitives from
+ *  <xrpl/telemetry/SpanNames.h>.
+ *
+ *  Why a separate header from TxSpanNames.h:
+ *  TxSpanNames.h lives under src/xrpld/ (daemon) and serves the overlay/app
+ *  lifecycle spans (tx.receive, tx.process). Library code (applySteps.cpp,
+ *  Transactor.cpp) must not depend on daemon headers, so the apply-pipeline
+ *  constants live here instead. The attribute strings ("tx_type",
+ *  "ter_result", "applied") intentionally match TxSpanNames.h so the collector
+ *  spanmetrics connector aggregates both sets under the same dimensions.
+ *
+ *  Span hierarchy (deterministic trace_id derived from txID[0:16]):
+ *
+ *  The three stages run sequentially and often on different threads, so they
+ *  do not auto-parent. Each uses a hash-derived trace_id keyed on the same
+ *  transaction id, placing all three under one trace without context
+ *  propagation. A transaction that hard-fails preflight or preclaim never
+ *  reaches the transactor span — the stage attribute identifies where it
+ *  stopped.
+ *
+ *    +-----------------------------------------------------------+
+ *    | trace_id = txID[0:16]                                     |
+ *    |                                                           |
+ *    |  +-------------------+   +------------------+   +-------+  |
+ *    |  | tx.preflight      |   | tx.preclaim      |   | tx.   |  |
+ *    |  | stage=preflight   |-->| stage=preclaim   |-->| trans |  |
+ *    |  | tx_type           |   | tx_type          |   | actor |  |
+ *    |  | ter_result        |   | ter_result       |   | stage=|  |
+ *    |  +-------------------+   +------------------+   | apply |  |
+ *    |   stateless checks       ledger-aware checks   +-------+  |
+ *    |   (signature, fields)    (sequence, fee)        applies   |
+ *    +-----------------------------------------------------------+
+ *
+ *  Usage:
+ *  @code
+ *      #include <xrpl/tx/detail/TxApplySpanNames.h>
+ *      using namespace telemetry;
+ *
+ *      // preflight() / preclaim() use hashSpan with a full span name:
+ *      auto span = SpanGuard::hashSpan(
+ *          TraceCategory::Transactions, tx_apply_span::preflight,
+ *          txID.data(), txID.kBytes);
+ *      span.setAttribute(tx_apply_span::attr::stage, tx_apply_span::val::preflight);
+ *      span.setAttribute(tx_apply_span::attr::terResult, transToken(ter).c_str());
+ *  @endcode
+ *
+ *  @code
+ *      // Transactor::operator() uses span() with prefix + suffix:
+ *      auto span = SpanGuard::span(
+ *          TraceCategory::Transactions, seg::tx, tx_apply_span::op::transactor);
+ *      span.setAttribute(tx_apply_span::attr::stage, tx_apply_span::val::apply);
+ *  @endcode
+ */
+
+#include <xrpl/telemetry/SpanNames.h>
+
+namespace xrpl::telemetry::tx_apply_span {
+
+// ===== Span operation suffixes =============================================
+
+namespace op {
+/// "preflight" — stateless transaction checks (suffix form).
+inline constexpr auto preflight = makeStr("preflight");
+/// "preclaim" — ledger-aware checks before fee claim (suffix form).
+inline constexpr auto preclaim = makeStr("preclaim");
+/// "transactor" — the apply stage (suffix form, used with span()).
+inline constexpr auto transactor = makeStr("transactor");
+}  // namespace op
+
+// ===== Full span names (tx.<op>) ===========================================
+
+/// "tx.preflight" — full name for hashSpan() at the preflight stage.
+inline constexpr auto preflight = join(seg::tx, op::preflight);
+/// "tx.preclaim" — full name for hashSpan() at the preclaim stage.
+inline constexpr auto preclaim = join(seg::tx, op::preclaim);
+
+// ===== Attribute keys ======================================================
+
+namespace attr {
+/// "stage" — which apply-pipeline stage this span represents. Drives the
+/// collector spanmetrics `stage` dimension for per-stage RED metrics.
+inline constexpr auto stage = makeStr("stage");
+/// "tx_type" — transaction type name (e.g., "Payment", "OfferCreate").
+/// Matches tx_span::attr::txType so both share the spanmetrics dimension.
+inline constexpr auto txType = makeStr("tx_type");
+/// "ter_result" — engine result code after the stage (e.g., "tesSUCCESS").
+inline constexpr auto terResult = makeStr("ter_result");
+/// "applied" — whether the transaction was applied to the ledger (apply only).
+inline constexpr auto applied = makeStr("applied");
+}  // namespace attr
+
+// ===== Attribute values (stage names) ======================================
+
+namespace val {
+/// "preflight" — value of the stage attribute on tx.preflight.
+inline constexpr auto preflight = makeStr("preflight");
+/// "preclaim" — value of the stage attribute on tx.preclaim.
+inline constexpr auto preclaim = makeStr("preclaim");
+/// "apply" — value of the stage attribute on tx.transactor.
+inline constexpr auto apply = makeStr("apply");
+}  // namespace val
+
+}  // namespace xrpl::telemetry::tx_apply_span
--- a/src/libxrpl/tx/Transactor.cpp
+++ b/src/libxrpl/tx/Transactor.cpp
@@ -44,6 +44,7 @@
 #include <xrpl/tx/SignerEntries.h>
 #include <xrpl/tx/apply.h>
 #include <xrpl/tx/applySteps.h>
+#include <xrpl/tx/detail/TxApplySpanNames.h>

 #include <cstddef>
 #include <cstdint>
@@ -1199,9 +1200,11 @@ Transactor::operator()()
    auto span = telemetry::SpanGuard::span(
        telemetry::TraceCategory::Transactions,
        telemetry::seg::tx,
-        telemetry::makeStr("transactor"));
+        telemetry::tx_apply_span::op::transactor);
+    // "apply" — the third apply-pipeline stage, after preflight and preclaim.
+    span.setAttribute(telemetry::tx_apply_span::attr::stage, telemetry::tx_apply_span::val::apply);
    if (auto const* fmt = TxFormats::getInstance().findByType(ctx_.tx.getTxnType()))
-        span.setAttribute("tx_type", fmt->getName().c_str());
+        span.setAttribute(telemetry::tx_apply_span::attr::txType, fmt->getName().c_str());

    JLOG(j_.trace()) << "apply: " << ctx_.tx.getTransactionID();

@@ -1429,8 +1432,8 @@ Transactor::operator()()

    JLOG(j_.trace()) << (applied ? "applied " : "not applied ") << transToken(result);

-    span.setAttribute("ter_result", transToken(result).c_str());
-    span.setAttribute("applied", applied);
+    span.setAttribute(telemetry::tx_apply_span::attr::terResult, transToken(result).c_str());
+    span.setAttribute(telemetry::tx_apply_span::attr::applied, applied);

    return {result, applied, metadata};
 }
--- a/src/libxrpl/tx/applySteps.cpp
+++ b/src/libxrpl/tx/applySteps.cpp
@@ -13,13 +13,16 @@
 #include <xrpl/protocol/SeqProxy.h>
 #include <xrpl/protocol/TER.h>
 #include <xrpl/protocol/XRPAmount.h>
+#include <xrpl/telemetry/SpanGuard.h>
 #include <xrpl/tx/ApplyContext.h>
 #include <xrpl/tx/Transactor.h>
+#include <xrpl/tx/detail/TxApplySpanNames.h>

 #include <cstdint>
 #include <exception>
 #include <memory>
 #include <optional>
+#include <string_view>
 #include <utility>
 #pragma push_macro("TRANSACTION")
 #undef TRANSACTION
@@ -51,6 +54,47 @@ struct UnknownTxnType : std::exception
    }
 };

+/** Look up the human-readable transaction type name for span attributes.
+ *  Returns nullptr if the type is unknown so the caller can skip the
+ *  attribute rather than emit an empty value.
+ */
+char const*
+txTypeName(TxType txnType)
+{
+    if (auto const* fmt = TxFormats::getInstance().findByType(txnType))
+        return fmt->getName().c_str();
+    return nullptr;
+}
+
+/** Create a deterministic-trace span for an apply-pipeline stage.
+ *
+ *  The trace_id is derived from txID[0:16] so the preflight, preclaim, and
+ *  transactor spans of one transaction share a trace even though they run
+ *  sequentially and often on different threads. Sets the stage, tx_type, and
+ *  (after the stage runs) ter_result attributes that drive the collector
+ *  spanmetrics dimensions. A no-op when telemetry is disabled.
+ *
+ *  @param name   Full span name (tx_apply_span::preflight / ::preclaim).
+ *  @param stage  Stage attribute value (tx_apply_span::val::*).
+ *  @param tx     The transaction supplying the id and type.
+ */
+[[nodiscard]] telemetry::SpanGuard
+makeStageSpan(std::string_view name, std::string_view stage, STTx const& tx)
+{
+    auto const txID = tx.getTransactionID();
+    auto span = telemetry::SpanGuard::hashSpan(
+        telemetry::TraceCategory::Transactions, name, txID.data(), txID.kBytes);
+    // Guard the type lookup behind the active check: preflight runs for every
+    // transaction, so findByType() must not run when tracing is off/disabled.
+    if (span)
+    {
+        span.setAttribute(telemetry::tx_apply_span::attr::stage, stage);
+        if (char const* typeName = txTypeName(tx.getTxnType()))
+            span.setAttribute(telemetry::tx_apply_span::attr::txType, typeName);
+    }
+    return span;
+}
+
 // Call a lambda with the concrete transaction type as a template parameter
 // throw an "UnknownTxnType" exception on error
 template <class F>
@@ -133,82 +177,122 @@ consequencesHelper(PreflightContext const& ctx)
 static std::pair<NotTEC, TxConsequences>
 invokePreflight(PreflightContext const& ctx)
 {
+    // Trace the preflight stage. The span shares the transaction's
+    // deterministic trace_id so it correlates with preclaim and transactor.
+    auto span = makeStageSpan(
+        telemetry::tx_apply_span::preflight, telemetry::tx_apply_span::val::preflight, ctx.tx);
    try
    {
-        return withTxnType(ctx.rules, ctx.tx.getTxnType(), [&]<typename T>() {
+        auto result = withTxnType(ctx.rules, ctx.tx.getTxnType(), [&]<typename T>() {
            auto const tec = Transactor::invokePreflight<T>(ctx);
            return std::make_pair(
                tec, isTesSuccess(tec) ? consequencesHelper<T>(ctx) : TxConsequences{tec});
        });
+        if (span)
+        {
+            span.setAttribute(
+                telemetry::tx_apply_span::attr::terResult, transToken(result.first).c_str());
+        }
+        return result;
    }
    catch (UnknownTxnType const& e)
    {
        // Should never happen
        // LCOV_EXCL_START
        JLOG(ctx.j.fatal()) << "Unknown transaction type in preflight: " << e.txnType;
+        span.recordException(e);
        UNREACHABLE("xrpl::invokePreflight : unknown transaction type");
        return {temUNKNOWN, TxConsequences{temUNKNOWN}};
        // LCOV_EXCL_STOP
    }
+    catch (std::exception const& e)
+    {
+        // The caller's preflight() maps this to tefEXCEPTION. Record it on the
+        // span before unwinding so per-stage error counts include exceptions.
+        span.setAttribute(
+            telemetry::tx_apply_span::attr::terResult, transToken(tefEXCEPTION).c_str());
+        span.recordException(e);
+        throw;
+    }
 }

 static TER
 invokePreclaim(PreclaimContext const& ctx)
 {
+    // Trace the preclaim stage under the transaction's deterministic trace_id.
+    auto span = makeStageSpan(
+        telemetry::tx_apply_span::preclaim, telemetry::tx_apply_span::val::preclaim, ctx.tx);
    try
    {
        // use name hiding to accomplish compile-time polymorphism of static
        // class functions for Transactor and derived classes.
-        return withTxnType(ctx.view.rules(), ctx.tx.getTxnType(), [&]<typename T>() -> TER {
-            // preclaim functionality is divided into two sections:
-            // 1. Up to and including the signature check: returns NotTEC.
-            //    All transaction checks before and including checkSign
-            //    MUST return NotTEC, or something more restrictive.
-            //    Allowing tec results in these steps risks theft or
-            //    destruction of funds, as a fee will be charged before the
-            //    signature is checked.
-            // 2. After the signature check: returns TER.
+        TER const preclaimTer =
+            withTxnType(ctx.view.rules(), ctx.tx.getTxnType(), [&]<typename T>() -> TER {
+                // preclaim functionality is divided into two sections:
+                // 1. Up to and including the signature check: returns NotTEC.
+                //    All transaction checks before and including checkSign
+                //    MUST return NotTEC, or something more restrictive.
+                //    Allowing tec results in these steps risks theft or
+                //    destruction of funds, as a fee will be charged before the
+                //    signature is checked.
+                // 2. After the signature check: returns TER.

-            // If the transactor requires a valid account and the
-            // transaction doesn't list one, preflight will have already
-            // a flagged a failure.
-            auto const id = ctx.tx.getAccountID(sfAccount);
+                // If the transactor requires a valid account and the
+                // transaction doesn't list one, preflight will have already
+                // a flagged a failure.
+                auto const id = ctx.tx.getAccountID(sfAccount);

-            if (id != beast::kZero)
-            {
-                if (NotTEC const preSigResult = [&]() -> NotTEC {
-                        if (NotTEC const result = T::checkSeqProxy(ctx.view, ctx.tx, ctx.j))
-                            return result;
+                if (id != beast::kZero)
+                {
+                    if (NotTEC const preSigResult = [&]() -> NotTEC {
+                            if (NotTEC const result = T::checkSeqProxy(ctx.view, ctx.tx, ctx.j))
+                                return result;

-                        if (NotTEC const result = T::checkPriorTxAndLastLedger(ctx))
-                            return result;
+                            if (NotTEC const result = T::checkPriorTxAndLastLedger(ctx))
+                                return result;

-                        if (NotTEC const result = T::checkPermission(ctx.view, ctx.tx))
-                            return result;
+                            if (NotTEC const result = T::checkPermission(ctx.view, ctx.tx))
+                                return result;

-                        if (NotTEC const result = T::checkSign(ctx))
-                            return result;
+                            if (NotTEC const result = T::checkSign(ctx))
+                                return result;

-                        return tesSUCCESS;
-                    }())
-                    return preSigResult;
+                            return tesSUCCESS;
+                        }())
+                        return preSigResult;

-                if (TER const result = T::checkFee(ctx, calculateBaseFee(ctx.view, ctx.tx)))
-                    return result;
-            }
+                    if (TER const result = T::checkFee(ctx, calculateBaseFee(ctx.view, ctx.tx)))
+                        return result;
+                }

-            return T::preclaim(ctx);
-        });
+                return T::preclaim(ctx);
+            });
+        if (span)
+        {
+            span.setAttribute(
+                telemetry::tx_apply_span::attr::terResult, transToken(preclaimTer).c_str());
+        }
+        return preclaimTer;
    }
    catch (UnknownTxnType const& e)
    {
        // Should never happen
        // LCOV_EXCL_START
        JLOG(ctx.j.fatal()) << "Unknown transaction type in preclaim: " << e.txnType;
+        span.recordException(e);
        UNREACHABLE("xrpl::invokePreclaim : unknown transaction type");
        return temUNKNOWN;
        // LCOV_EXCL_STOP
    }
+    catch (std::exception const& e)
+    {
+        // The caller's preclaim() maps this to tefEXCEPTION. Record it on the
+        // span before unwinding so per-stage error counts include exceptions.
+        span.setAttribute(
+            telemetry::tx_apply_span::attr::terResult, transToken(tefEXCEPTION).c_str());
+        span.recordException(e);
+        throw;
+    }
 }

 /**
--- a/src/tests/libxrpl/telemetry/TxApplySpanNames.cpp
+++ b/src/tests/libxrpl/telemetry/TxApplySpanNames.cpp
@@ -0,0 +1,52 @@
+#include <xrpl/tx/detail/TxApplySpanNames.h>
+
+#include <gtest/gtest.h>
+
+#include <string_view>
+
+/** Contract tests for the transaction apply-pipeline span constants.
+ *
+ *  The span names and attribute keys in TxApplySpanNames.h are a cross-component
+ *  contract: the collector spanmetrics connector aggregates on these exact
+ *  strings (dimensions tx_type, ter_result, stage) and the Grafana
+ *  transaction-overview dashboard queries them. A silent rename here would
+ *  break per-stage metrics with no compile error, so these tests pin the
+ *  literal values. They need no telemetry runtime and run in every build.
+ */
+
+using namespace xrpl::telemetry;
+
+TEST(TxApplySpanNames, span_names_are_dot_qualified)
+{
+    // Full span names feed SpanGuard::hashSpan() in applySteps.cpp.
+    EXPECT_EQ(std::string_view(tx_apply_span::preflight), "tx.preflight");
+    EXPECT_EQ(std::string_view(tx_apply_span::preclaim), "tx.preclaim");
+}
+
+TEST(TxApplySpanNames, operation_suffixes)
+{
+    // Suffix used with SpanGuard::span(cat, seg::tx, suffix) in Transactor.cpp.
+    EXPECT_EQ(std::string_view(tx_apply_span::op::preflight), "preflight");
+    EXPECT_EQ(std::string_view(tx_apply_span::op::preclaim), "preclaim");
+    EXPECT_EQ(std::string_view(tx_apply_span::op::transactor), "transactor");
+}
+
+TEST(TxApplySpanNames, attribute_keys_match_collector_dimensions)
+{
+    // These keys MUST match docker/telemetry/otel-collector-config.yaml
+    // spanmetrics dimensions and TxSpanNames.h (so both span sets aggregate
+    // under one dimension).
+    EXPECT_EQ(std::string_view(tx_apply_span::attr::stage), "stage");
+    EXPECT_EQ(std::string_view(tx_apply_span::attr::txType), "tx_type");
+    EXPECT_EQ(std::string_view(tx_apply_span::attr::terResult), "ter_result");
+    EXPECT_EQ(std::string_view(tx_apply_span::attr::applied), "applied");
+}
+
+TEST(TxApplySpanNames, stage_values_are_the_three_pipeline_stages)
+{
+    // The stage attribute carries exactly these three values; they become the
+    // spanmetrics `stage` dimension cardinality (3) and the dashboard filter.
+    EXPECT_EQ(std::string_view(tx_apply_span::val::preflight), "preflight");
+    EXPECT_EQ(std::string_view(tx_apply_span::val::preclaim), "preclaim");
+    EXPECT_EQ(std::string_view(tx_apply_span::val::apply), "apply");
+}
--- a/src/xrpld/app/misc/detail/TxQ.cpp
+++ b/src/xrpld/app/misc/detail/TxQ.cpp
@@ -607,7 +607,8 @@ TxQ::tryClearAccountQueueUpThruTx(
    if (txResult.applied)
    {
        // All of the queued transactions applied, so remove them from the
-        // queue.
+        // queue.  `dist` queued txs preceded the current one in the batch.
+        span.setAttribute(txq_span::attr::numCleared, static_cast<std::int64_t>(dist));
        endTxIter = erase(accountIter->second, beginTxIter, endTxIter);
        // If `tx` is replacing a queued tx, delete that one, too.
        if (endTxIter != accountIter->second.transactions.end() && endTxIter->first == tSeqProx)
@@ -744,6 +745,9 @@ TxQ::apply(
    span.setAttribute(txq_span::attr::txHash, to_string(tx->getTransactionID()).c_str());
    if (auto const* fmt = TxFormats::getInstance().findByType(tx->getTxnType()))
        span.setAttribute(txq_span::attr::txType, fmt->getName().c_str());
+    // Default outcome; overridden below on the direct-apply and queued paths.
+    // Every other early return leaves the tx rejected from the queue.
+    span.setAttribute(txq_span::attr::txqStatus, txq_span::val::rejected);

    NumberSO const stNumberSO{view.rules().enabled(fixUniversalNumber)};

@@ -757,7 +761,10 @@ TxQ::apply(
    // See if the transaction paid a high enough fee that it can go straight
    // into the ledger.
    if (auto directApplied = tryDirectApply(app, view, tx, flags, j))
+    {
+        span.setAttribute(txq_span::attr::txqStatus, txq_span::val::appliedDirect);
        return *directApplied;
+    }

    if ((flags & TapDryRun) != 0u)
        return {telCAN_NOT_QUEUE, false};
@@ -884,6 +891,10 @@ TxQ::apply(
    auto const metricsSnapshot = feeMetrics_.getSnapshot();
    auto const feeLevelPaid = getFeeLevelPaid(view, *tx);
    auto const requiredFeeLevel = getRequiredFeeLevel(view, flags, metricsSnapshot, lock);
+    span.setAttribute(
+        txq_span::attr::feeLevelPaid, static_cast<std::int64_t>(feeLevelPaid.value()));
+    span.setAttribute(
+        txq_span::attr::requiredFeeLevel, static_cast<std::int64_t>(requiredFeeLevel.value()));

    // Is there a blocker already in the account's queue?  If so, don't
    // allow additional transactions in the queue.
@@ -1217,6 +1228,7 @@ TxQ::apply(
            /* Can't erase (*replacedTxIter) here because success
                implies that it has already been deleted.
            */
+            span.setAttribute(txq_span::attr::txqStatus, txq_span::val::applied);
            return result;
        }
    }
@@ -1332,6 +1344,7 @@ TxQ::apply(
                     << " to queue."
                     << " Flags: " << flags;

+    span.setAttribute(txq_span::attr::txqStatus, txq_span::val::queued);
    return {terQUEUED, false};
 }

@@ -1366,18 +1379,21 @@ TxQ::processClosedLedger(Application& app, ReadView const& view, bool timeLeap)
        maxSize_ = std::max(snapshot.txnsExpected * setup_.ledgersInQueue, setup_.queueSizeMin);

    // Remove any queued candidates whose LastLedgerSequence has gone by.
+    std::int64_t expiredCount = 0;
    for (auto candidateIter = byFee_.begin(); candidateIter != byFee_.end();)
    {
        if (candidateIter->lastValid && *candidateIter->lastValid <= ledgerSeq)
        {
            byAccount_.at(candidateIter->account).dropPenalty = true;
            candidateIter = erase(candidateIter);
+            ++expiredCount;
        }
        else
        {
            ++candidateIter;
        }
    }
+    span.setAttribute(txq_span::attr::expiredCount, expiredCount);

    // Remove any TxQAccounts that don't have candidates
    // under them
--- a/src/xrpld/app/misc/detail/TxQSpanNames.h
+++ b/src/xrpld/app/misc/detail/TxQSpanNames.h
@@ -15,12 +15,14 @@
 *    |  +--------------------------------------------------+ |
 *    |  | txq.enqueue                                      | |
 *    |  | TxQ::apply()                                     | |
- *    |  | attrs: tx_hash, status, fee_level                | |
+ *    |  | attrs: tx_hash, tx_type, txq_status,             | |
+ *    |  |        fee_level_paid, required_fee_level         | |
 *    |  |                                                  | |
 *    |  |  +-------------------+ +----------------------+  | |
 *    |  |  | txq.apply_direct  | | txq.batch_clear      |  | |
 *    |  |  | tryDirectApply()  | | tryClearAccount...() |  | |
- *    |  |  +-------------------+ +----------------------+  | |
+ *    |  |  +-------------------+ | attrs: num_cleared   |  | |
+ *    |  |                        +----------------------+  | |
 *    |  +--------------------------------------------------+ |
 *    +-------------------------------------------------------+
 *