diff --git a/.github/scripts/levelization/results/ordering.txt b/.github/scripts/levelization/results/ordering.txt index 51aa0f2b6e..d543d22c93 100644 --- a/.github/scripts/levelization/results/ordering.txt +++ b/.github/scripts/levelization/results/ordering.txt @@ -93,6 +93,7 @@ test.csf > xrpl.basics test.csf > xrpld.consensus test.csf > xrpl.json test.csf > xrpl.protocol +test.csf > xrpl.telemetry test.json > test.jtx test.json > xrpl.json test.jtx > xrpl.basics @@ -240,8 +241,10 @@ xrpld.app > xrpl.shamap xrpld.app > xrpl.telemetry xrpld.app > xrpl.tx xrpld.consensus > xrpl.basics +xrpld.consensus > xrpld.telemetry xrpld.consensus > xrpl.json xrpld.consensus > xrpl.protocol +xrpld.consensus > xrpl.telemetry xrpld.core > xrpl.basics xrpld.core > xrpl.core xrpld.core > xrpl.json diff --git a/OpenTelemetryPlan/02-design-decisions.md b/OpenTelemetryPlan/02-design-decisions.md index 8ff6eaa983..4101f74771 100644 --- a/OpenTelemetryPlan/02-design-decisions.md +++ b/OpenTelemetryPlan/02-design-decisions.md @@ -239,6 +239,22 @@ resource::SemanticConventions::SERVICE_INSTANCE_ID = "xrpl.consensus.ledger.seq" = int64 // Ledger sequence "xrpl.consensus.tx_count" = int64 // Transactions in consensus set "xrpl.consensus.duration_ms" = float64 // Round duration + +// Phase 4a: Establish-phase gap fill & cross-node correlation +"xrpl.consensus.round_id" = int64 // Consensus round number +"xrpl.consensus.ledger_id" = string // previousLedger.id() — shared across nodes +"xrpl.consensus.trace_strategy" = string // "deterministic" or "attribute" +"xrpl.consensus.converge_percent" = int64 // Convergence % (0-100+) +"xrpl.consensus.establish_count" = int64 // Number of establish iterations +"xrpl.consensus.disputes_count" = int64 // Active disputed transactions +"xrpl.consensus.proposers_agreed" = int64 // Peers agreeing with our position +"xrpl.consensus.proposers_total" = int64 // Total peer positions +"xrpl.consensus.agree_count" = int64 // Peers that agree (haveConsensus) +"xrpl.consensus.disagree_count" = int64 // Peers that disagree +"xrpl.consensus.threshold_percent" = int64 // Current threshold (50/65/70/95) +"xrpl.consensus.result" = string // "yes", "no", "moved_on" +"xrpl.consensus.mode.old" = string // Previous consensus mode +"xrpl.consensus.mode.new" = string // New consensus mode ``` #### RPC Attributes diff --git a/OpenTelemetryPlan/06-implementation-phases.md b/OpenTelemetryPlan/06-implementation-phases.md index ccf1fd54d4..eadd18293f 100644 --- a/OpenTelemetryPlan/06-implementation-phases.md +++ b/OpenTelemetryPlan/06-implementation-phases.md @@ -164,11 +164,22 @@ gantt | 4.10 | Multi-validator integration tests | | 4.11 | Performance validation | +### Spans Produced + +| Span Name | Location | Attributes | +| --------------------------- | ---------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `consensus.proposal.send` | `RCLConsensus.cpp:177` | `xrpl.consensus.round` | +| `consensus.ledger_close` | `RCLConsensus.cpp:282` | `xrpl.consensus.ledger.seq`, `xrpl.consensus.mode` | +| `consensus.accept` | `RCLConsensus.cpp:395` | `xrpl.consensus.proposers`, `xrpl.consensus.round_time_ms` | +| `consensus.accept.apply` | `RCLConsensus.cpp:521` | `xrpl.consensus.close_time`, `close_time_correct`, `close_resolution_ms`, `state`, `proposing`, `round_time_ms`, `ledger.seq`, `parent_close_time`, `close_time_self`, `close_time_vote_bins`, `resolution_direction` | +| `consensus.validation.send` | `RCLConsensus.cpp:753` | `xrpl.consensus.proposing` | + ### Exit Criteria - [x] Complete consensus round traces - [x] Phase transitions visible - [x] Proposals and validations traced +- [x] Close time agreement tracked (per `avCT_CONSENSUS_PCT`) - [x] No impact on consensus timing - [ ] Multi-validator test network validated @@ -196,6 +207,69 @@ See [Phase4_taskList.md](./Phase4_taskList.md) for the full spec and implementat --- +## 6.5a Phase 4a: Establish-Phase Gap Fill & Cross-Node Correlation + +**Objective**: Fill tracing gaps in the establish phase and establish cross-node +correlation using deterministic trace IDs derived from `previousLedger.id()`. + +**Approach**: Direct instrumentation in `Consensus.h`. Long-lived spans use +direct SpanGuard members; short-lived scoped spans use `XRPL_TRACE_*` macros. + +### Tasks + +| Task | Description | Effort | Risk | +| ---- | ------------------------------------------------ | ------ | ------ | +| 4a.0 | Prerequisites: extend SpanGuard & Telemetry APIs | 1d | Medium | +| 4a.1 | Adaptor `getTelemetry()` method | 0.5d | Low | +| 4a.2 | Switchable round span with deterministic traceID | 2d | High | +| 4a.3 | Span members in `Consensus.h` | 0.5d | Medium | +| 4a.4 | Instrument `phaseEstablish()` | 1d | Medium | +| 4a.5 | Instrument `updateOurPositions()` | 1d | Medium | +| 4a.6 | Instrument `haveConsensus()` (thresholds) | 1d | Medium | +| 4a.7 | Instrument mode changes | 0.5d | Low | +| 4a.8 | Reparent existing spans under round | 0.5d | Low | +| 4a.9 | Build verification and testing | 1d | Low | + +**Total Effort**: 9 days + +### Spans Produced + +| Span Name | Location | Key Attributes | +| ---------------------------- | ------------------ | ---------------------------------------------------------------- | +| `consensus.round` | `RCLConsensus.cpp` | `round_id`, `ledger_id`, `ledger.seq`, `mode`; link → prev round | +| `consensus.establish` | `Consensus.h` | `converge_percent`, `establish_count`, `proposers` | +| `consensus.update_positions` | `Consensus.h` | `disputes_count`, `converge_percent`, `proposers_agreed/total` | +| `consensus.check` | `Consensus.h` | `agree/disagree_count`, `threshold_percent`, `result` | +| `consensus.mode_change` | `RCLConsensus.cpp` | `mode.old`, `mode.new` | + +### Exit Criteria + +- [ ] Establish phase internals fully traced (disputes, convergence, thresholds) +- [ ] Cross-node correlation works via deterministic trace_id +- [ ] Strategy switchable via config (`deterministic` / `attribute`) +- [ ] Consecutive rounds linked via follows-from spans +- [ ] Build passes with telemetry ON and OFF +- [ ] No impact on consensus timing + +See [Phase4_taskList.md](./Phase4_taskList.md) for full task details. + +--- + +## 6.5b Phase 4b: Cross-Node Propagation (Future) + +**Objective**: Wire `TraceContextPropagator` for P2P messages (proposals, +validations) to enable true distributed tracing between nodes. + +**Status**: Design documented, NOT implemented. Protobuf fields (field 1001) +and `TraceContextPropagator` class exist. Wiring deferred until Phase 4a is +validated in a multi-node environment. + +**Prerequisites**: Phase 4a complete and validated. + +See [Phase4_taskList.md § Phase 4b](./Phase4_taskList.md) for full design. + +--- + ## 6.6 Phase 5: Documentation & Deployment (Week 9) **Objective**: Production readiness diff --git a/OpenTelemetryPlan/Phase4_taskList.md b/OpenTelemetryPlan/Phase4_taskList.md index a5ef457efd..08330865a5 100644 --- a/OpenTelemetryPlan/Phase4_taskList.md +++ b/OpenTelemetryPlan/Phase4_taskList.md @@ -67,7 +67,7 @@ - Create `consensus.ledger_close` span - Set attributes: close_time, mode, transaction count in initial position - - Note: The Consensus template class in `include/xrpl/consensus/Consensus.h` drives phase transitions — check if instrumentation goes there or in the Adaptor + - Note: The Consensus template class in `src/xrpld/consensus/Consensus.h` drives phase transitions — Phase 4a instruments directly in the template **Key modified files**: @@ -213,9 +213,625 @@ **Parallel work**: Tasks 4.2, 4.3, and 4.4 can run in parallel after 4.1 is complete. Task 4.5 depends on all three. Task 4.6 depends on 4.2 and Phase 3. +### Implemented Spans + +| Span Name | Method | Key Attributes | +| --------------------------- | ---------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `consensus.proposal.send` | `Adaptor::propose` | `xrpl.consensus.round` | +| `consensus.ledger_close` | `Adaptor::onClose` | `xrpl.consensus.ledger.seq`, `xrpl.consensus.mode` | +| `consensus.accept` | `Adaptor::onAccept` | `xrpl.consensus.proposers`, `xrpl.consensus.round_time_ms` | +| `consensus.accept.apply` | `Adaptor::doAccept` | `xrpl.consensus.close_time`, `close_time_correct`, `close_resolution_ms`, `state`, `proposing`, `round_time_ms`, `ledger.seq`, `parent_close_time`, `close_time_self`, `close_time_vote_bins`, `resolution_direction` | +| `consensus.validation.send` | `Adaptor::onAccept` (via validate) | `xrpl.consensus.proposing` | + +#### Close Time Attributes (consensus.accept.apply) + +The `consensus.accept.apply` span captures ledger close time agreement details +driven by `avCT_CONSENSUS_PCT` (75% validator agreement threshold): + +- **`xrpl.consensus.close_time`** — Agreed-upon ledger close time (epoch seconds). When validators disagree (`consensusCloseTime == epoch`), this is synthetically set to `prevCloseTime + 1s`. +- **`xrpl.consensus.close_time_correct`** — `true` if validators reached agreement, `false` if they "agreed to disagree" (close time forced to prev+1s). +- **`xrpl.consensus.close_resolution_ms`** — Rounding granularity for close time (starts at 30s, decreases as ledger interval stabilizes). +- **`xrpl.consensus.state`** — `"finished"` (normal) or `"moved_on"` (consensus failed, adopted best available). +- **`xrpl.consensus.proposing`** — Whether this node was proposing. +- **`xrpl.consensus.round_time_ms`** — Total consensus round duration. +- **`xrpl.consensus.parent_close_time`** — Previous ledger's close time (epoch seconds). Enables computing close-time deltas across consecutive rounds without correlating separate spans. +- **`xrpl.consensus.close_time_self`** — This node's own proposed close time before consensus voting. +- **`xrpl.consensus.close_time_vote_bins`** — Number of distinct close-time vote bins from peer proposals. Higher values indicate less agreement among validators. +- **`xrpl.consensus.resolution_direction`** — Whether close-time resolution `"increased"` (coarser), `"decreased"` (finer), or stayed `"unchanged"` relative to the previous ledger. + **Exit Criteria** (from [06-implementation-phases.md §6.11.4](./06-implementation-phases.md)): -- [ ] Complete consensus round traces -- [ ] Phase transitions visible -- [ ] Proposals and validations traced -- [ ] No impact on consensus timing +- [x] Complete consensus round traces +- [x] Phase transitions visible +- [x] Proposals and validations traced +- [x] Close time agreement tracked (per `avCT_CONSENSUS_PCT`) +- [x] No impact on consensus timing + +--- + +# Phase 4a: Establish-Phase Gap Fill & Cross-Node Correlation + +> **Goal**: Fill tracing gaps in the consensus establish phase (disputes, convergence, +> threshold escalation, mode changes) and establish cross-node correlation using a +> deterministic shared trace ID derived from `previousLedger.id()`. +> +> **Approach**: Direct instrumentation in `Consensus.h` — the generic consensus +> template has full access to internal state (`convergePercent_`, `result_->disputes`, +> `mode_`, threshold logic). Telemetry access comes via a single new adaptor +> method `getTelemetry()`. Long-lived spans (round, establish) are stored as +> class members using `SpanGuard` directly — NOT the `XRPL_TRACE_*` convenience +> macros (which create local variables named `_xrpl_guard_`). Short-lived +> scoped spans (update_positions, check) can use the macros. All code compiles +> to no-ops when `XRPL_ENABLE_TELEMETRY` is not defined. +> +> **Branch**: `pratik/otel-phase4-consensus-tracing` + +## Design: Switchable Correlation Strategy + +Two strategies for cross-node trace correlation, switchable via config: + +### Strategy A — Deterministic Trace ID (Default) + +Derive `trace_id = SHA256(previousLedger.id())[0:16]` so all nodes in the same +consensus round share the same trace_id without P2P context propagation. + +- **Pros**: All nodes appear in the same trace in Tempo/Jaeger automatically. + No collector-side post-processing needed. +- **Cons**: Overrides OTel's random trace_id generation; requires custom + `IdGenerator` or manual span context construction. + +### Strategy B — Attribute-Based Correlation + +Use normal random trace_id but attach `xrpl.consensus.ledger_id` as an attribute +on every consensus span. Correlation happens at query time via Tempo/Grafana +`by attribute` queries. + +- **Pros**: Standard OTel trace_id semantics; no SDK customization. +- **Cons**: Cross-node correlation requires query-time joins, not automatic. + +### Config + +```ini +[telemetry] +# "deterministic" (default) or "attribute" +consensus_trace_strategy=deterministic +``` + +### Implementation + +In `RCLConsensus::Adaptor::startRound()`: + +- If `deterministic`: + 1. Compute `trace_id_bytes = SHA256(prevLedgerID)[0:16]` + 2. Construct `opentelemetry::trace::TraceId(trace_id_bytes)` + 3. Create a synthetic `SpanContext` with this trace_id and a random span_id: + ```cpp + auto traceId = opentelemetry::trace::TraceId(trace_id_bytes); + auto spanId = opentelemetry::trace::SpanId(random_8_bytes); + auto syntheticCtx = opentelemetry::trace::SpanContext( + traceId, spanId, opentelemetry::trace::TraceFlags(1), false); + ``` + 4. Wrap in `opentelemetry::context::Context` via + `opentelemetry::trace::SetSpan(context, syntheticSpan)` + 5. Call `startSpan("consensus.round", parentContext)` so the new span + inherits the deterministic trace_id. +- If `attribute`: start a normal `consensus.round` span, set + `xrpl.consensus.ledger_id = previousLedger.id()` as attribute. + +Both strategies always set `xrpl.consensus.round_id` (round number) and +`xrpl.consensus.ledger_id` (previous ledger hash) as attributes. + +--- + +## Design: Span Hierarchy + +``` +consensus.round (root — created in RCLConsensus::startRound, closed at accept) +│ link → previous round's SpanContext (follows-from) +│ +├── consensus.establish (phaseEstablish → acceptance, in Consensus.h) +│ ├── consensus.update_positions (each updateOurPositions call) +│ │ └── consensus.dispute.resolve (per-tx dispute resolution event) +│ ├── consensus.check (each haveConsensus call) +│ └── consensus.mode_change (short-lived span in adaptor on mode transition) +│ +├── consensus.accept (existing onAccept span — reparented under round) +│ +└── consensus.validation.send (existing — reparented, follows-from link to round) +``` + +### Span Links (follows-from relationships) + +| Link Source | Link Target | Rationale | +| ----------------------------------------- | -------------------------- | ------------------------------------------------------------------------------ | +| `consensus.round` (N+1) | `consensus.round` (N) | Causal chain: round N+1 exists because round N accepted | +| `consensus.validation.send` | `consensus.round` | Validation follows from the round that produced it; may outlive the round span | +| _(Phase 4b)_ Received proposal processing | Sender's `consensus.round` | Cross-node causal link via P2P context propagation | + +--- + +## Task 4a.0: Prerequisites — Extend SpanGuard and Telemetry APIs + +**Objective**: Add missing API surface needed by later tasks. + +**What to do**: + +1. **Add `SpanGuard::addEvent()` with attributes** (needed by Task 4a.5): + The current `addEvent(string_view name)` only accepts a name. Add an + overload that accepts key-value attributes: + + ```cpp + void addEvent(std::string_view name, + std::initializer_list< + std::pair> attributes) + { + span_->AddEvent(std::string(name), attributes); + } + ``` + +2. **Add a `Telemetry::startSpan()` overload that accepts span links** (needed by Tasks 4a.2, 4a.8): + The current `startSpan()` has no span link support. Add an overload that + accepts a vector of `SpanContext` links for follows-from relationships: + + ```cpp + virtual opentelemetry::nostd::shared_ptr + startSpan( + std::string_view name, + opentelemetry::context::Context const& parentContext, + std::vector const& links, + opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0; + ``` + +3. **Add `XRPL_TRACE_ADD_EVENT` macro** (needed by Task 4a.5): + Add to `TracingInstrumentation.h` to expose `addEvent(name, attrs)` through + the macro interface (consistent with `XRPL_TRACE_SET_ATTR` pattern): + ```cpp + #ifdef XRPL_ENABLE_TELEMETRY + #define XRPL_TRACE_ADD_EVENT(name, ...) \ + if (_xrpl_guard_.has_value()) \ + { \ + _xrpl_guard_->addEvent(name, __VA_ARGS__); \ + } + #else + #define XRPL_TRACE_ADD_EVENT(name, ...) ((void)0) + #endif + ``` + +**Key modified files**: + +- `include/xrpl/telemetry/SpanGuard.h` — add `addEvent()` overload +- `include/xrpl/telemetry/Telemetry.h` — add `startSpan()` with links +- `src/xrpld/telemetry/Telemetry.cpp` — implement new overload +- `src/xrpld/telemetry/NullTelemetry.cpp` — no-op implementation +- `src/xrpld/telemetry/TracingInstrumentation.h` — add `XRPL_TRACE_ADD_EVENT` macro + +--- + +## Task 4a.1: Adaptor `getTelemetry()` Method + +**Objective**: Give `Consensus.h` access to the telemetry subsystem without +coupling the generic template to OTel headers. + +**What to do**: + +- Add `getTelemetry()` method to the Adaptor concept (returns + `xrpl::telemetry::Telemetry&`). The return type is already forward-declared + behind `#ifdef XRPL_ENABLE_TELEMETRY`. +- Implement in `RCLConsensus::Adaptor` — delegates to `app_.getTelemetry()`. +- In `Consensus.h`, the `XRPL_TRACE_*` macros call + `adaptor_.getTelemetry()` — when telemetry is disabled, the macros expand to + `((void)0)` and the method is never called. + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.h` — declare `getTelemetry()` +- `src/xrpld/app/consensus/RCLConsensus.cpp` — implement `getTelemetry()` + +--- + +## Task 4a.2: Switchable Round Span with Deterministic Trace ID + +**Objective**: Create a `consensus.round` root span in `startRound()` that uses +the switchable correlation strategy. Store span context as a member for child +spans in `Consensus.h`. + +**What to do**: + +- In `RCLConsensus::Adaptor::startRound()` (or a new helper): + - Read `consensus_trace_strategy` from config. + - **Deterministic**: compute `trace_id = SHA256(prevLedgerID)[0:16]`. + Construct a `SpanContext` with this trace_id, then start + `consensus.round` span as child of that context. + - **Attribute**: start normal `consensus.round` span. + - Set attributes on both: `xrpl.consensus.round_id`, + `xrpl.consensus.ledger_id`, `xrpl.consensus.ledger.seq`, + `xrpl.consensus.mode`. + - Store the round span in `Consensus` as a member (see Task 4a.3). + - If a previous round's span context is available, add a **span link** + (follows-from) to establish the round chain. + +- Add `createDeterministicTraceId(hash)` utility to + `include/xrpl/telemetry/Telemetry.h` (returns 16-byte trace ID from a + 256-bit hash by truncation). + +- Add `consensus_trace_strategy` to `Telemetry::Setup` and + `TelemetryConfig.cpp` parser: + ```cpp + /** Cross-node correlation strategy: "deterministic" or "attribute". */ + std::string consensusTraceStrategy = "deterministic"; + ``` + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.cpp` +- `include/xrpl/telemetry/Telemetry.h` — `createDeterministicTraceId()` +- `src/xrpld/telemetry/TelemetryConfig.cpp` — parse new config option + +--- + +## Task 4a.3: Span Members in `Consensus.h` + +**Objective**: Add span storage to the `Consensus` class so that spans created +in `startRound()` (adaptor) are accessible from `phaseEstablish()`, +`updateOurPositions()`, and `haveConsensus()` (template methods). + +**What to do**: + +- Add to `Consensus` private members (guarded by `#ifdef XRPL_ENABLE_TELEMETRY`): + ```cpp + #ifdef XRPL_ENABLE_TELEMETRY + std::optional roundSpan_; + std::optional establishSpan_; + opentelemetry::context::Context prevRoundContext_; + #endif + ``` +- `roundSpan_` is created in `startRound()` via the adaptor and stored. + Its `SpanGuard::Scope` member keeps the span active on the thread context + for the entire round lifetime. +- `establishSpan_` is created when entering phaseEstablish and cleared on accept. + It becomes a child of `roundSpan_` via OTel's thread-local context propagation. +- `prevRoundContext_` stores the previous round's context for follows-from links. + +**Threading assumption**: `startRound()`, `phaseEstablish()`, `updateOurPositions()`, +and `haveConsensus()` all run on the same thread (the consensus job queue thread). +This is required for the `SpanGuard::Scope`-based parent-child hierarchy to work. +The `Consensus` class documentation confirms it is NOT thread-safe and calls are +serialized by the application. + +- Add conditional include at top of `Consensus.h`: + ```cpp + #ifdef XRPL_ENABLE_TELEMETRY + #include + #include + #endif + ``` + +**Key modified files**: + +- `src/xrpld/consensus/Consensus.h` + +--- + +## Task 4a.4: Instrument `phaseEstablish()` + +**Objective**: Create `consensus.establish` span wrapping the establish phase, +with attributes for convergence progress. + +**What to do**: + +- At the start of `phaseEstablish()` (line 1298), if `establishSpan_` is not + yet created, create it as child of `roundSpan_` using the **direct API** + (NOT the `XRPL_TRACE_CONSENSUS` macro, which creates a local variable): + + ```cpp + #ifdef XRPL_ENABLE_TELEMETRY + if (!establishSpan_ && adaptor_.getTelemetry().shouldTraceConsensus()) + { + establishSpan_.emplace( + adaptor_.getTelemetry().startSpan("consensus.establish")); + } + #endif + ``` + +- Set attributes on each call: + - `xrpl.consensus.converge_percent` — `convergePercent_` + - `xrpl.consensus.establish_count` — `establishCounter_` + - `xrpl.consensus.proposers` — `currPeerPositions_.size()` + +- On phase exit (transition to accept), close the establish span and record + final duration. + +**Key modified files**: + +- `src/xrpld/consensus/Consensus.h` — `phaseEstablish()` method + +--- + +## Task 4a.5: Instrument `updateOurPositions()` + +**Objective**: Trace each position update cycle including dispute resolution +details. + +**What to do**: + +- At the start of `updateOurPositions()` (line 1418), create a scoped child + span. This method is called and returns within a single `phaseEstablish()` + call, so the `XRPL_TRACE_CONSENSUS` macro works here (scoped local): + + ```cpp + XRPL_TRACE_CONSENSUS(adaptor_.getTelemetry(), "consensus.update_positions"); + ``` + +- Set attributes: + - `xrpl.consensus.disputes_count` — `result_->disputes.size()` + - `xrpl.consensus.converge_percent` — current convergence + - `xrpl.consensus.proposers_agreed` — count of peers with same position + - `xrpl.consensus.proposers_total` — total peer positions + +- Inside the dispute resolution loop, for each dispute that changes our vote, + add an **event** with attributes using `XRPL_TRACE_ADD_EVENT` (from Task 4a.0): + ```cpp + XRPL_TRACE_ADD_EVENT("dispute.resolve", { + {"xrpl.tx.id", std::string(tx_id)}, + {"xrpl.dispute.our_vote", our_vote}, + {"xrpl.dispute.yays", static_cast(yays)}, + {"xrpl.dispute.nays", static_cast(nays)} + }); + ``` + +**Key modified files**: + +- `src/xrpld/consensus/Consensus.h` — `updateOurPositions()` method + +--- + +## Task 4a.6: Instrument `haveConsensus()` (Threshold & Convergence) + +**Objective**: Trace consensus checking including threshold escalation +(`ConsensusParms::AvalancheState::{init, mid, late, stuck}`). + +**What to do**: + +- At the start of `haveConsensus()` (line 1598), create a scoped child span: + + ```cpp + XRPL_TRACE_CONSENSUS(adaptor_.getTelemetry(), "consensus.check"); + ``` + +- Set attributes: + - `xrpl.consensus.agree_count` — peers that agree with our position + - `xrpl.consensus.disagree_count` — peers that disagree + - `xrpl.consensus.converge_percent` — convergence percentage + - `xrpl.consensus.result` — ConsensusState result (Yes/No/MovedOn) + +- The free function `checkConsensus()` in `Consensus.cpp` (line 151) determines + thresholds based on `currentAgreeTime`. Threshold values come from + `ConsensusParms::avalancheCutoffs` (defined in `ConsensusParms.h`). + The escalation states are `ConsensusParms::AvalancheState::{init, mid, late, stuck}`. + Record the effective threshold as an attribute on the span: + - `xrpl.consensus.threshold_percent` — current threshold from `avalancheCutoffs` + +**Key modified files**: + +- `src/xrpld/consensus/Consensus.h` — `haveConsensus()` method + +--- + +## Task 4a.7: Instrument Mode Changes + +**Objective**: Trace consensus mode transitions (proposing ↔ observing, +wrongLedger, switchedLedger). + +**What to do**: + +Mode changes are rare (typically 0-1 per round), so a **standalone short-lived +span** is appropriate (not an event). This captures timing of the mode change +itself. + +- In `RCLConsensus::Adaptor::onModeChange()`, create a scoped span: + + ```cpp + XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.mode_change"); + XRPL_TRACE_SET_ATTR("xrpl.consensus.mode.old", to_string(before).c_str()); + XRPL_TRACE_SET_ATTR("xrpl.consensus.mode.new", to_string(after).c_str()); + ``` + +- Note: `MonitoredMode::set()` (line 304 in `Consensus.h`) calls + `adaptor_.onModeChange(before, after)` — so the span is created in the + adaptor, which already has telemetry access. No instrumentation needed + in `Consensus.h` for this task. + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.cpp` — `onModeChange()` + +--- + +## Task 4a.8: Reparent Existing Spans Under Round + +**Objective**: Make existing consensus spans (`consensus.accept`, +`consensus.accept.apply`, `consensus.validation.send`) children of the +`consensus.round` root span instead of being standalone. + +**What to do**: + +- The existing spans in `onAccept()`, `doAccept()`, and `validate()` use + `XRPL_TRACE_CONSENSUS(app_.getTelemetry(), ...)` which creates standalone + spans on the current thread's context. +- After Task 4a.2 creates the round span and stores it, these methods run on + the same thread within the round span's scope, so they automatically become + children. Verify this works correctly. +- For `consensus.validation.send`: add a **span link** (follows-from) to the + round span context, since the validation may be processed after the round + completes. + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.cpp` — verify parent-child hierarchy + +--- + +## Task 4a.9: Build Verification and Testing + +**Objective**: Verify all Phase 4a changes compile cleanly with telemetry ON +and OFF, and don't affect consensus timing. + +**What to do**: + +1. Build with `telemetry=ON` — verify no compilation errors +2. Build with `telemetry=OFF` — verify macros expand to no-ops, no new includes + leak into `Consensus.h` when disabled +3. Run existing consensus unit tests +4. Verify `#ifdef XRPL_ENABLE_TELEMETRY` guards on all new members in + `Consensus.h` +5. Run `pccl` pre-commit checks + +**Verification Checklist**: + +- [x] Build succeeds with telemetry ON +- [x] Build succeeds with telemetry OFF +- [x] Existing consensus tests pass +- [x] `Consensus.h` has zero OTel includes when telemetry is OFF +- [x] No new virtual calls in hot consensus paths +- [x] `pccl` passes + +--- + +## Phase 4a Summary + +| Task | Description | New Files | Modified Files | Depends On | +| ---- | ------------------------------------------------ | --------- | -------------- | ---------- | +| 4a.0 | Prerequisites: extend SpanGuard & Telemetry APIs | 0 | 4 | Phase 4 | +| 4a.1 | Adaptor `getTelemetry()` method | 0 | 2 | Phase 4 | +| 4a.2 | Switchable round span with deterministic traceID | 0 | 3 | 4a.0, 4a.1 | +| 4a.3 | Span members in `Consensus.h` | 0 | 1 | 4a.1 | +| 4a.4 | Instrument `phaseEstablish()` | 0 | 1 | 4a.3 | +| 4a.5 | Instrument `updateOurPositions()` | 0 | 1 | 4a.0, 4a.3 | +| 4a.6 | Instrument `haveConsensus()` (thresholds) | 0 | 1 | 4a.3 | +| 4a.7 | Instrument mode changes | 0 | 1 | 4a.1 | +| 4a.8 | Reparent existing spans under round | 0 | 1 | 4a.0, 4a.2 | +| 4a.9 | Build verification and testing | 0 | 0 | 4a.0-4a.8 | + +**Parallel work**: Tasks 4a.0 and 4a.1 can run in parallel. Tasks 4a.4, 4a.5, 4a.6, and 4a.7 can run in parallel after 4a.3 (and 4a.0 for 4a.5). + +### New Spans (Phase 4a) + +| Span Name | Location | Key Attributes | +| ---------------------------- | ------------------ | ---------------------------------------------------------------------------------- | +| `consensus.round` | `RCLConsensus.cpp` | `round_id`, `ledger_id`, `ledger.seq`, `mode`; link → prev round | +| `consensus.establish` | `Consensus.h` | `converge_percent`, `establish_count`, `proposers` | +| `consensus.update_positions` | `Consensus.h` | `disputes_count`, `converge_percent`, `proposers_agreed`, `proposers_total` | +| `consensus.check` | `Consensus.h` | `agree_count`, `disagree_count`, `converge_percent`, `result`, `threshold_percent` | +| `consensus.mode_change` | `RCLConsensus.cpp` | `mode.old`, `mode.new` | + +### New Events (Phase 4a) + +| Event Name | Parent Span | Attributes | +| ----------------- | ---------------------------- | ----------------------------------- | +| `dispute.resolve` | `consensus.update_positions` | `tx_id`, `our_vote`, `yays`, `nays` | + +### New Attributes (Phase 4a) + +```cpp +// Round-level (on consensus.round) +"xrpl.consensus.round_id" = int64 // Consensus round number +"xrpl.consensus.ledger_id" = string // previousLedger.id() hash +"xrpl.consensus.trace_strategy" = string // "deterministic" or "attribute" + +// Establish-level +"xrpl.consensus.converge_percent" = int64 // Convergence % (0-100+) +"xrpl.consensus.establish_count" = int64 // Number of establish iterations +"xrpl.consensus.disputes_count" = int64 // Active disputes +"xrpl.consensus.proposers_agreed" = int64 // Peers agreeing with us +"xrpl.consensus.proposers_total" = int64 // Total peer positions +"xrpl.consensus.agree_count" = int64 // Peers that agree (haveConsensus) +"xrpl.consensus.disagree_count" = int64 // Peers that disagree +"xrpl.consensus.threshold_percent" = int64 // Current threshold (50/65/70/95) +"xrpl.consensus.result" = string // "yes", "no", "moved_on" + +// Mode change +"xrpl.consensus.mode.old" = string // Previous mode +"xrpl.consensus.mode.new" = string // New mode +``` + +### Implementation Notes + +- **Separation of concerns**: All non-trivial telemetry code extracted to private + helpers (`startRoundTracing`, `createValidationSpan`, `startEstablishTracing`, + `updateEstablishTracing`, `endEstablishTracing`). Business logic methods contain + only single-line `#ifdef` blocks calling these helpers. +- **Thread safety**: `createValidationSpan()` runs on the jtACCEPT worker thread. + Instead of accessing `roundSpan_` across threads, a `roundSpanContext_` snapshot + (lightweight `SpanContext` value type) is captured on the consensus thread in + `startRoundTracing()` and read by `createValidationSpan()`. The job queue + provides the happens-before guarantee. +- **Macro safety**: `XRPL_TRACE_ADD_EVENT` uses `do { } while (0)` to prevent + dangling-else issues. +- **Config validation**: `consensus_trace_strategy` is validated to be either + `"deterministic"` or `"attribute"`, falling back to `"deterministic"` for + unrecognised values. +- **Plan deviation**: `roundSpan_` is stored in `RCLConsensus::Adaptor` (not + `Consensus.h`) because the adaptor has access to telemetry config and can + implement the deterministic trace ID strategy. `establishSpan_` is correctly + in `Consensus.h` as planned. + +--- + +# Phase 4b: Cross-Node Propagation (Future — Documentation Only) + +> **Goal**: Wire `TraceContextPropagator` for P2P messages so that proposals +> and validations carry trace context between nodes. This enables true +> distributed tracing where a proposal sent by Node A creates a child span +> on Node B. +> +> **Status**: NOT IMPLEMENTED. The protobuf fields and propagator class exist +> but are not wired. This section documents the design for future work. + +## Architecture + +``` +Node A (proposing) Node B (receiving) +───────────────── ────────────────── +consensus.round consensus.round +├── propose() ├── peerProposal() +│ └── TraceContextPropagator │ └── TraceContextPropagator +│ ::injectToProtobuf( │ ::extractFromProtobuf( +│ TMProposeSet.trace_context) │ TMProposeSet.trace_context) +│ │ └── span link → Node A's context +└── validate() └── onValidation() + └── inject into TMValidation └── extract from TMValidation +``` + +## Wiring Points + +| Message | Inject Location | Extract Location | Protobuf Field | +| --------------- | ---------------------------------- | ----------------------------------- | -------------------------- | +| `TMProposeSet` | `Adaptor::propose()` | `PeerImp::onMessage(TMProposeSet)` | field 1001: `TraceContext` | +| `TMValidation` | `Adaptor::validate()` | `PeerImp::onMessage(TMValidation)` | field 1001: `TraceContext` | +| `TMTransaction` | `NetworkOPs::processTransaction()` | `PeerImp::onMessage(TMTransaction)` | field 1001: `TraceContext` | + +## Span Link Semantics + +Received messages use **span links** (follows-from), NOT parent-child: + +- The receiver's processing span links to the sender's context +- This preserves each node's independent trace tree +- Cross-node correlation visible via linked traces in Tempo/Jaeger + +## Interaction with Deterministic Trace ID (Strategy A) + +When using deterministic trace_id (Phase 4a default), cross-node spans already +share the same trace_id. P2P propagation adds **span-level** linking: + +- Without propagation: spans from different nodes appear in the same trace + (same trace_id) but without parent-child or follows-from relationships. +- With propagation: spans have explicit links showing which proposal/validation + from Node A caused processing on Node B. + +## Prerequisites + +- Phase 4a (this task list) — establish phase tracing must be in place +- `TraceContextPropagator` class (already exists in + `include/xrpl/telemetry/TraceContextPropagator.h`) +- Protobuf `TraceContext` message (already exists, field 1001) diff --git a/cspell.config.yaml b/cspell.config.yaml index 67053694e7..04b759cd61 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -214,6 +214,7 @@ words: - qalloc - queuable - Raphson + - reparent - replayer - rerere - retriable diff --git a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml index 682e1bbb13..1c372461d7 100644 --- a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml +++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml @@ -8,6 +8,7 @@ # Phase 1b (infra): Base filters — node identity, service, span name, status. # Phase 2 (RPC): RPC command, status, role filters. # Phase 3 (TX): Transaction hash, local/peer origin, status. +# Phase 4 (Cons): Consensus mode, round, ledger sequence, close time. apiVersion: 1 @@ -113,3 +114,34 @@ datasources: operator: "=" scope: span type: dynamic + # Phase 4: Consensus tracing filters + - id: consensus-mode + tag: xrpl.consensus.mode + operator: "=" + scope: span + type: static + - id: consensus-round + tag: xrpl.consensus.round + operator: "=" + scope: span + type: dynamic + - id: consensus-ledger-seq + tag: xrpl.consensus.ledger.seq + operator: "=" + scope: span + type: static + - id: consensus-close-time-correct + tag: xrpl.consensus.close_time_correct + operator: "=" + scope: span + type: dynamic + - id: consensus-state + tag: xrpl.consensus.state + operator: "=" + scope: span + type: dynamic + - id: consensus-close-resolution + tag: xrpl.consensus.close_resolution_ms + operator: "=" + scope: span + type: dynamic diff --git a/include/xrpl/telemetry/SpanGuard.h b/include/xrpl/telemetry/SpanGuard.h index 07ad8e9ae7..7629e389a5 100644 --- a/include/xrpl/telemetry/SpanGuard.h +++ b/include/xrpl/telemetry/SpanGuard.h @@ -123,6 +123,25 @@ public: span_->AddEvent(std::string(name)); } + /** Add a named event with key-value attributes to the span. + + Allows attaching structured metadata to a point-in-time event on + the span timeline (e.g., "dispute.resolve" with transaction ID + and vote result attributes). + + @param name Event name (e.g., "dispute.resolve"). + @param attributes Key-value pairs describing the event. + */ + void + addEvent( + std::string_view name, + std::initializer_list< + std::pair> + attributes) + { + span_->AddEvent(std::string(name), attributes); + } + /** Record an exception as a span event following OTel semantic conventions, and mark the span status as error. diff --git a/include/xrpl/telemetry/Telemetry.h b/include/xrpl/telemetry/Telemetry.h index 0a21aa2c90..780ee57cd9 100644 --- a/include/xrpl/telemetry/Telemetry.h +++ b/include/xrpl/telemetry/Telemetry.h @@ -27,10 +27,15 @@ #include #ifdef XRPL_ENABLE_TELEMETRY +#include #include #include #include +#include #include + +#include +#include #endif namespace xrpl { @@ -104,6 +109,17 @@ public: /** Enable tracing for ledger close/accept. */ bool traceLedger = true; + + /** Cross-node correlation strategy for consensus tracing. + + "deterministic" derives trace_id from previousLedger.id() so all + nodes participating in the same consensus round share the same + trace_id, enabling cross-node trace correlation in the backend. + + "attribute" uses normal random trace_id with the ledger_id stored + as a span attribute; correlation must be done via attribute queries. + */ + std::string consensusTraceStrategy = "deterministic"; }; virtual ~Telemetry() = default; @@ -161,6 +177,18 @@ public: virtual bool shouldTraceLedger() const = 0; + /** @return The consensus trace correlation strategy. + + "deterministic" derives trace_id from previousLedger.id() so all + nodes participating in the same consensus round share the same + trace_id, enabling cross-node trace correlation in the backend. + + "attribute" uses normal random trace_id with the ledger_id stored + as a span attribute; correlation must be done via attribute queries. + */ + virtual std::string const& + getConsensusTraceStrategy() const = 0; + #ifdef XRPL_ENABLE_TELEMETRY /** Get or create a named tracer instance. @@ -199,6 +227,30 @@ public: std::string_view name, opentelemetry::context::Context const& parentContext, opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0; + + /** Start a new span with an explicit parent context and span links. + + Span links establish follows-from relationships without implying + a parent-child hierarchy. Common uses include linking consensus + round N+1 to round N, or linking a validation span back to the + round that produced it. + + @param name Span name. + @param parentContext The parent span's context. + @param links Vector of (SpanContext, attributes) pairs + for follows-from relationships. + @param kind The span kind (defaults to kInternal). + @return A shared pointer to the new Span. + */ + virtual opentelemetry::nostd::shared_ptr + startSpan( + std::string_view name, + opentelemetry::context::Context const& parentContext, + std::vector>>> const& + links, + opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0; #endif }; diff --git a/src/libxrpl/telemetry/NullTelemetry.cpp b/src/libxrpl/telemetry/NullTelemetry.cpp index 64c8f5e491..62404c6ce5 100644 --- a/src/libxrpl/telemetry/NullTelemetry.cpp +++ b/src/libxrpl/telemetry/NullTelemetry.cpp @@ -13,7 +13,9 @@ #include #ifdef XRPL_ENABLE_TELEMETRY +#include #include +#include #endif namespace xrpl { @@ -82,6 +84,12 @@ public: return false; } + std::string const& + getConsensusTraceStrategy() const override + { + return setup_.consensusTraceStrategy; + } + #ifdef XRPL_ENABLE_TELEMETRY opentelemetry::nostd::shared_ptr getTracer(std::string_view) override @@ -107,6 +115,20 @@ public: return opentelemetry::nostd::shared_ptr( new opentelemetry::trace::NoopSpan(nullptr)); } + + /** No-op: returns a NoopSpan, ignoring links. */ + opentelemetry::nostd::shared_ptr + startSpan( + std::string_view, + opentelemetry::context::Context const&, + std::vector>>> const&, + opentelemetry::trace::SpanKind) override + { + return opentelemetry::nostd::shared_ptr( + new opentelemetry::trace::NoopSpan(nullptr)); + } #endif }; diff --git a/src/libxrpl/telemetry/Telemetry.cpp b/src/libxrpl/telemetry/Telemetry.cpp index 8f705726ca..0ae95da809 100644 --- a/src/libxrpl/telemetry/Telemetry.cpp +++ b/src/libxrpl/telemetry/Telemetry.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -26,6 +27,9 @@ #include #include #include +#include + +#include namespace xrpl { namespace telemetry { @@ -99,6 +103,12 @@ public: return false; } + std::string const& + getConsensusTraceStrategy() const override + { + return setup_.consensusTraceStrategy; + } + opentelemetry::nostd::shared_ptr getTracer(std::string_view) override { @@ -119,6 +129,19 @@ public: { return opentelemetry::nostd::shared_ptr(new trace_api::NoopSpan(nullptr)); } + + /** No-op: returns a NoopSpan, ignoring links. */ + opentelemetry::nostd::shared_ptr + startSpan( + std::string_view, + opentelemetry::context::Context const&, + std::vector>>> const&, + trace_api::SpanKind) override + { + return opentelemetry::nostd::shared_ptr(new trace_api::NoopSpan(nullptr)); + } }; /** Full OTel SDK implementation that exports trace spans via OTLP/HTTP. @@ -253,6 +276,12 @@ public: return setup_.traceLedger; } + std::string const& + getConsensusTraceStrategy() const override + { + return setup_.consensusTraceStrategy; + } + opentelemetry::nostd::shared_ptr getTracer(std::string_view name) override { @@ -282,6 +311,41 @@ public: opts.parent = parentContext; return tracer->StartSpan(std::string(name), opts); } + + /** Start a span with explicit parent context and span links. + + Links are passed as the third argument to Tracer::StartSpan(), + which accepts any type satisfying is_span_context_kv_iterable + (a container of pairs where .first is SpanContext and .second is + a key-value iterable). + + @param name Span name. + @param parentContext The parent span's context. + @param links Span links for follows-from relationships. + @param kind The span kind. + @return A shared pointer to the new Span. + */ + opentelemetry::nostd::shared_ptr + startSpan( + std::string_view name, + opentelemetry::context::Context const& parentContext, + std::vector>>> const& + links, + trace_api::SpanKind kind) override + { + auto tracer = getTracer("rippled"); + trace_api::StartSpanOptions opts; + opts.kind = kind; + opts.parent = parentContext; + // Links are passed as a separate parameter to StartSpan; + // the SDK wraps them in a SpanContextKeyValueIterableView. + // Empty attributes map is passed explicitly to select the + // template overload that accepts (name, attributes, links, opts). + std::map emptyAttrs; + return tracer->StartSpan(std::string(name), emptyAttrs, links, opts); + } }; } // namespace diff --git a/src/libxrpl/telemetry/TelemetryConfig.cpp b/src/libxrpl/telemetry/TelemetryConfig.cpp index 2cc74d1a4e..b05506dccf 100644 --- a/src/libxrpl/telemetry/TelemetryConfig.cpp +++ b/src/libxrpl/telemetry/TelemetryConfig.cpp @@ -47,6 +47,19 @@ setup_Telemetry( setup.tracePeer = section.value_or("trace_peer", 0) != 0; setup.traceLedger = section.value_or("trace_ledger", 1) != 0; + // Consensus tracing strategy: "deterministic" (shared trace_id derived + // from previousLedger.id()) or "attribute" (random trace_id with + // ledger_id stored as a span attribute). + setup.consensusTraceStrategy = + section.value_or("consensus_trace_strategy", "deterministic"); + + if (setup.consensusTraceStrategy != "deterministic" && + setup.consensusTraceStrategy != "attribute") + { + // Fall back to default if the value is unrecognised. + setup.consensusTraceStrategy = "deterministic"; + } + return setup; } diff --git a/src/test/csf/Peer.h b/src/test/csf/Peer.h index c36d600e6c..3ae613ef1a 100644 --- a/src/test/csf/Peer.h +++ b/src/test/csf/Peer.h @@ -11,6 +11,10 @@ #include #include +#ifdef XRPL_ENABLE_TELEMETRY +#include +#endif + #include #include @@ -618,6 +622,22 @@ struct Peer { } +#ifdef XRPL_ENABLE_TELEMETRY + /** Provide telemetry access for the Consensus template. + * + * The test Peer adaptor uses a static disabled NullTelemetry instance + * so that all shouldTrace*() checks return false and no spans are + * created during simulation tests. + */ + telemetry::Telemetry& + getTelemetry() + { + static auto tel = make_Telemetry( + telemetry::Telemetry::Setup{}, beast::Journal{beast::Journal::getNullSink()}); + return *tel; + } +#endif + // Share a message by broadcasting to all connected peers template void diff --git a/src/tests/libxrpl/telemetry/TracingMacros.cpp b/src/tests/libxrpl/telemetry/TracingMacros.cpp index a8c1bb5e86..c65fb92488 100644 --- a/src/tests/libxrpl/telemetry/TracingMacros.cpp +++ b/src/tests/libxrpl/telemetry/TracingMacros.cpp @@ -82,6 +82,35 @@ TEST(TracingMacros, conditional_guards) } } +TEST(TracingMacros, consensus_close_time_attributes) +{ + // Verify the consensus.accept.apply attribute pattern compiles and + // doesn't crash with NullTelemetry. Mirrors the real instrumentation + // in RCLConsensus::Adaptor::doAccept(). + telemetry::Telemetry::Setup setup; + setup.enabled = false; + beast::Journal::Sink& sink = beast::Journal::getNullSink(); + beast::Journal j(sink); + auto tel = telemetry::make_Telemetry(setup, j); + + { + XRPL_TRACE_CONSENSUS(*tel, "consensus.accept.apply"); + XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq", static_cast(42)); + XRPL_TRACE_SET_ATTR("xrpl.consensus.close_time", static_cast(780000000)); + XRPL_TRACE_SET_ATTR("xrpl.consensus.close_time_correct", true); + XRPL_TRACE_SET_ATTR("xrpl.consensus.close_resolution_ms", static_cast(30000)); + XRPL_TRACE_SET_ATTR("xrpl.consensus.state", std::string("finished")); + XRPL_TRACE_SET_ATTR("xrpl.consensus.proposing", true); + XRPL_TRACE_SET_ATTR("xrpl.consensus.round_time_ms", static_cast(3500)); + } + // close_time_correct=false path (agreed to disagree) + { + XRPL_TRACE_CONSENSUS(*tel, "consensus.accept.apply"); + XRPL_TRACE_SET_ATTR("xrpl.consensus.close_time_correct", false); + XRPL_TRACE_SET_ATTR("xrpl.consensus.state", std::string("moved_on")); + } +} + #ifdef XRPL_ENABLE_TELEMETRY TEST(TracingMacros, span_guard_raii) diff --git a/src/xrpld/app/consensus/RCLConsensus.cpp b/src/xrpld/app/consensus/RCLConsensus.cpp index 315f9f2ab5..c567e52416 100644 --- a/src/xrpld/app/consensus/RCLConsensus.cpp +++ b/src/xrpld/app/consensus/RCLConsensus.cpp @@ -14,8 +14,19 @@ #include #include #include +#include #include +#ifdef XRPL_ENABLE_TELEMETRY +#include +#include + +#include +#include +#include +#include +#include +#endif #include #include #include @@ -32,6 +43,57 @@ namespace xrpl { +#ifdef XRPL_ENABLE_TELEMETRY +namespace { + +/** Create an OTel context with a deterministic trace ID. + * + * Derives the trace_id from the first 16 bytes of a uint256 ledger hash + * so that all validators participating in the same consensus round + * produce spans sharing the same trace_id. This enables cross-node + * trace correlation in the backend without requiring explicit context + * propagation over the peer protocol. + * + * The span_id is randomly generated (8 bytes from the CSPRNG) so each + * validator's root span is unique within the shared trace. + * + * @param ledgerId The previousLedger.id() hash for the consensus round. + * @return An OTel Context containing a synthetic parent span with the + * deterministic trace_id and a random span_id. + */ +opentelemetry::context::Context +createDeterministicContext(uint256 const& ledgerId) +{ + namespace trace = opentelemetry::trace; + + // Use first 16 bytes of the 256-bit ledger hash as trace ID. + // uint256::data() returns a const uint8_t* to 32 bytes in + // big-endian order; the first 16 are the most-significant half. + trace::TraceId traceId(opentelemetry::nostd::span(ledgerId.data(), 16)); + + // Generate a random 8-byte span ID using the crypto PRNG. + uint8_t spanIdBytes[8]; + crypto_prng()(spanIdBytes, sizeof(spanIdBytes)); + trace::SpanId spanId(opentelemetry::nostd::span(spanIdBytes, 8)); + + // Build a synthetic SpanContext that is sampled (flag 0x01) + // and not remote (originated locally). + trace::SpanContext syntheticCtx( + traceId, + spanId, + trace::TraceFlags(1), + /* remote = */ false); + + // Wrap in a DefaultSpan and set on an empty Context via the + // standard kSpanKey used by the OTel SDK for context propagation. + return opentelemetry::context::Context{}.SetValue( + trace::kSpanKey, + opentelemetry::nostd::shared_ptr(new trace::DefaultSpan(syntheticCtx))); +} + +} // namespace +#endif // XRPL_ENABLE_TELEMETRY + RCLConsensus::RCLConsensus( Application& app, std::unique_ptr&& feeVote, @@ -171,6 +233,9 @@ RCLConsensus::Adaptor::share(RCLCxTx const& tx) void RCLConsensus::Adaptor::propose(RCLCxPeerPos::Proposal const& proposal) { + XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.proposal.send"); + XRPL_TRACE_SET_ATTR("xrpl.consensus.round", static_cast(proposal.proposeSeq())); + JLOG(j_.trace()) << (proposal.isBowOut() ? "We bow out: " : "We propose: ") << xrpl::to_string(proposal.prevLedger()) << " -> " << xrpl::to_string(proposal.position()); @@ -273,6 +338,11 @@ RCLConsensus::Adaptor::onClose( NetClock::time_point const& closeTime, ConsensusMode mode) -> Result { + XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.ledger_close"); + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.ledger.seq", static_cast(ledger.ledger_->header().seq + 1)); + XRPL_TRACE_SET_ATTR("xrpl.consensus.mode", to_string(mode).c_str()); + bool const wrongLCL = mode == ConsensusMode::wrongLedger; bool const proposing = mode == ConsensusMode::proposing; @@ -381,6 +451,11 @@ RCLConsensus::Adaptor::onAccept( Json::Value&& consensusJson, bool const validating) { + XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.accept"); + XRPL_TRACE_SET_ATTR("xrpl.consensus.proposers", static_cast(result.proposers)); + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.round_time_ms", static_cast(result.roundTime.read().count())); + app_.getJobQueue().addJob( jtACCEPT, "AcceptLedger", @@ -432,6 +507,57 @@ RCLConsensus::Adaptor::doAccept( closeTimeCorrect = true; } + /// @note This method runs on a JobQueue worker thread (jtACCEPT), not the + /// consensus thread where roundSpan_ is active. OTel's thread-local + /// context propagation does NOT cross thread boundaries, so the + /// consensus.accept.apply span below is standalone — it is NOT a child + /// of consensus.round. Cross-thread context propagation for this path + /// is a future enhancement (Phase 4b). + + // Trace the ledger application phase with close time details. + // This span runs on the jtACCEPT job queue thread (posted by onAccept), + // separate from the consensus.accept span which fires synchronously in + // onAccept. It captures the agreed-upon close time, whether validators + // converged on it (per avCT_CONSENSUS_PCT), the consensus outcome, + // parent close time, this node's own close time proposal, the number + // of distinct vote bins, and the resolution adaptation direction. + XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.accept.apply"); + XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq", static_cast(prevLedger.seq() + 1)); + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.close_time", + static_cast(consensusCloseTime.time_since_epoch().count())); + XRPL_TRACE_SET_ATTR("xrpl.consensus.close_time_correct", closeTimeCorrect); + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.close_resolution_ms", + static_cast( + std::chrono::duration_cast(closeResolution).count())); + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.state", std::string(consensusFail ? "moved_on" : "finished")); + XRPL_TRACE_SET_ATTR("xrpl.consensus.proposing", proposing); + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.round_time_ms", static_cast(result.roundTime.read().count())); + // Parent ledger's close time — enables computing close-time deltas across + // consecutive rounds without correlating separate spans. + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.parent_close_time", + static_cast(prevLedger.closeTime().time_since_epoch().count())); + // This node's own proposed close time before consensus voting. + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.close_time_self", + static_cast(rawCloseTimes.self.time_since_epoch().count())); + // Number of distinct close-time vote bins from peer proposals. + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.close_time_vote_bins", static_cast(rawCloseTimes.peers.size())); + // Whether close-time resolution increased (coarser), decreased (finer), + // or stayed the same relative to the previous ledger. + { + auto const prevRes = prevLedger.closeTimeResolution(); + std::string dir = (closeResolution > prevRes) ? "increased" + : (closeResolution < prevRes) ? "decreased" + : "unchanged"; + XRPL_TRACE_SET_ATTR("xrpl.consensus.resolution_direction", std::move(dir)); + } + JLOG(j_.debug()) << "Report: Prop=" << (proposing ? "yes" : "no") << " val=" << (validating_ ? "yes" : "no") << " corLCL=" << (haveCorrectLCL ? "yes" : "no") @@ -749,6 +875,17 @@ RCLConsensus::Adaptor::buildLCL( void RCLConsensus::Adaptor::validate(RCLCxLedger const& ledger, RCLTxSet const& txns, bool proposing) { + /// @note This method is called from doAccept(), which runs on a JobQueue + /// worker thread (jtACCEPT). The consensus.validation.send span is + /// therefore standalone — NOT a child of consensus.round. A span link + /// to the round span is added below to establish the follows-from + /// relationship without requiring parent-child context propagation. +#ifdef XRPL_ENABLE_TELEMETRY + std::optional _xrpl_guard_ = createValidationSpan(); +#endif + XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq", static_cast(ledger.seq())); + XRPL_TRACE_SET_ATTR("xrpl.consensus.proposing", proposing); + using namespace std::chrono_literals; auto validationTime = app_.timeKeeper().closeTime(); @@ -836,6 +973,13 @@ RCLConsensus::Adaptor::validate(RCLCxLedger const& ledger, RCLTxSet const& txns, void RCLConsensus::Adaptor::onModeChange(ConsensusMode before, ConsensusMode after) { + // Trace mode transitions as short-lived spans for visibility in the + // trace backend. Each transition (e.g. observing -> proposing) appears + // as a child of the current consensus.round span. + XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.mode_change"); + XRPL_TRACE_SET_ATTR("xrpl.consensus.mode.old", to_string(before).c_str()); + XRPL_TRACE_SET_ATTR("xrpl.consensus.mode.new", to_string(after).c_str()); + JLOG(j_.info()) << "Consensus mode change before=" << to_string(before) << ", after=" << to_string(after); @@ -958,6 +1102,10 @@ RCLConsensus::Adaptor::preStartRound(RCLCxLedger const& prevLgr, hash_setcontext(); + roundSpan_.reset(); + } + + auto& tel = app_.getTelemetry(); + if (!tel.shouldTraceConsensus()) + return; + + auto const& strategy = tel.getConsensusTraceStrategy(); + + // Build span links to previous round (follows-from) if available. + // This creates a causal chain between consecutive consensus rounds + // in the trace backend. + using LinkAttr = std::pair; + using SpanLink = std::pair>; + std::vector links; + + auto prevSpan = opentelemetry::trace::GetSpan(prevRoundContext_); + if (prevSpan && prevSpan->GetContext().IsValid()) + { + links.emplace_back( + prevSpan->GetContext(), + std::vector{{"xrpl.link.type", std::string("follows_from")}}); + } + + if (strategy == "deterministic") + { + // Derive trace_id from ledger hash so all validators in this + // round produce spans under the same trace. + auto parentCtx = createDeterministicContext(prevLgr.id()); + roundSpan_.emplace(tel.startSpan("consensus.round", parentCtx, links)); + } + else + { + // "attribute" strategy: random trace_id, correlation via + // the xrpl.consensus.ledger_id attribute. + if (links.empty()) + roundSpan_.emplace(tel.startSpan("consensus.round")); + else + { + // Use an empty context as parent (new root trace). + roundSpan_.emplace( + tel.startSpan("consensus.round", opentelemetry::context::Context{}, links)); + } + } + + // Set standard attributes on the round span. + roundSpan_->setAttribute("xrpl.consensus.ledger_id", to_string(prevLgr.id()).c_str()); + roundSpan_->setAttribute("xrpl.consensus.ledger.seq", static_cast(prevLgr.seq() + 1)); + roundSpan_->setAttribute("xrpl.consensus.mode", to_string(mode_.load()).c_str()); + roundSpan_->setAttribute("xrpl.consensus.trace_strategy", strategy.c_str()); + roundSpan_->setAttribute("xrpl.consensus.round_id", static_cast(prevLgr.seq() + 1)); + + // Snapshot the SpanContext for cross-thread use by createValidationSpan(). + roundSpanContext_ = roundSpan_->span().GetContext(); +} + +std::optional +RCLConsensus::Adaptor::createValidationSpan() +{ + if (!app_.getTelemetry().shouldTraceConsensus()) + return std::nullopt; + + // Build span link to the round span (follows-from relationship). + // The validation is triggered by the round but executes on a + // different thread and may outlive the round span. + std::vector>>> + links; + + // Use the snapshotted SpanContext (set on consensus thread in + // startRoundTracing) rather than accessing roundSpan_ directly, + // since this method runs on the jtACCEPT worker thread. + if (roundSpanContext_ && roundSpanContext_->IsValid()) + { + links.push_back({*roundSpanContext_, {}}); + } + + return telemetry::SpanGuard(app_.getTelemetry().startSpan( + "consensus.validation.send", opentelemetry::context::RuntimeContext::GetCurrent(), links)); +} +#endif + void RCLConsensus::startRound( NetClock::time_point const& now, diff --git a/src/xrpld/app/consensus/RCLConsensus.h b/src/xrpld/app/consensus/RCLConsensus.h index 15d36a1aa6..f3d72bd5c0 100644 --- a/src/xrpld/app/consensus/RCLConsensus.h +++ b/src/xrpld/app/consensus/RCLConsensus.h @@ -13,9 +13,16 @@ #include #include +#ifdef XRPL_ENABLE_TELEMETRY +#include + +#include +#endif + #include #include #include +#include #include #include #include @@ -27,6 +34,10 @@ class LocalTxs; class LedgerMaster; class ValidatorKeys; +namespace telemetry { +class Telemetry; +} // namespace telemetry + /** Manages the generic consensus algorithm for use by the RCL. */ class RCLConsensus @@ -68,6 +79,34 @@ class RCLConsensus RCLCensorshipDetector censorshipDetector_; NegativeUNLVote nUnlVote_; +#ifdef XRPL_ENABLE_TELEMETRY + /** Span for the current consensus round. + * + * Created in preStartRound(), ended (via reset()) when the next + * round begins. When consensusTraceStrategy is "deterministic", + * the trace_id is derived from previousLedger.id() so that all + * validators in the same round share the same trace_id. + */ + std::optional roundSpan_; + + /** Context captured from the previous consensus round. + * + * Used to create span links (follows-from) between consecutive + * rounds, establishing a causal chain in the trace backend. + * Default-constructed (empty) until the first round completes. + */ + opentelemetry::context::Context prevRoundContext_; + + /** SpanContext snapshot of the current round span. + * + * Captured in startRoundTracing() as a lightweight value-type copy + * so that createValidationSpan() — which runs on the jtACCEPT + * worker thread — can build span links without accessing roundSpan_ + * across threads. + */ + std::optional roundSpanContext_; +#endif + public: using Ledger_t = RCLCxLedger; using NodeID_t = NodeID; @@ -156,6 +195,51 @@ class RCLConsensus return parms_; } +#ifdef XRPL_ENABLE_TELEMETRY + /** Provide access to the telemetry subsystem for consensus tracing. + * + * Called by Consensus.h template methods (phaseEstablish, + * updateOurPositions, haveConsensus) to create child spans under the + * consensus round. When XRPL_ENABLE_TELEMETRY is not defined, the + * macros in Consensus.h expand to no-ops and this method is never + * called. + * + * @return Reference to the application's Telemetry instance. + */ + telemetry::Telemetry& + getTelemetry(); + + /** Set up the consensus round span and link it to the previous round. + * + * Extracted from preStartRound() to keep business logic free of + * telemetry details. Saves the previous round's OTel context for + * span-link construction, ends the old round span, and creates a + * new "consensus.round" span. Depending on the configured trace + * strategy the trace_id is either deterministic (derived from + * @p prevLgr hash) or random. + * + * @param prevLgr The ledger that will be the prior ledger for the + * new round — used to derive deterministic trace IDs + * and to set standard span attributes. + */ + void + startRoundTracing(RCLCxLedger const& prevLgr); + + /** Create the "consensus.validation.send" span with a link to the + * current round span. + * + * Extracted from validate() to keep the validation business logic + * free of span-construction boilerplate. The returned SpanGuard + * must be assigned to a local `_xrpl_guard_` so that subsequent + * XRPL_TRACE_SET_ATTR calls in the caller can reference it. + * + * @return An engaged optional SpanGuard if tracing is active, + * std::nullopt otherwise. + */ + std::optional + createValidationSpan(); +#endif + private: //--------------------------------------------------------------------- // The following members implement the generic Consensus requirements diff --git a/src/xrpld/consensus/Consensus.h b/src/xrpld/consensus/Consensus.h index 3c1e4e7dbf..884efdcf5d 100644 --- a/src/xrpld/consensus/Consensus.h +++ b/src/xrpld/consensus/Consensus.h @@ -11,6 +11,12 @@ #include #include +#ifdef XRPL_ENABLE_TELEMETRY +#include + +#include +#endif + #include #include #include @@ -601,6 +607,44 @@ private: // nodes that have bowed out of this consensus process hash_set deadNodes_; +#ifdef XRPL_ENABLE_TELEMETRY + /** Span for the establish phase of consensus. + * + * Created when the ledger closes and we enter phaseEstablish; + * cleared (ended) when consensus is reached and we move to the + * accept phase. This span is a child of the round span that + * lives in the Adaptor (via thread-local OTel context propagation). + */ + std::optional establishSpan_; + + /** Create the establish-phase span if not yet active. + * + * Called on each phaseEstablish() invocation. Creates a + * "consensus.establish" span on the first call and stores it in + * establishSpan_. Subsequent calls are no-ops while the span is + * still live. + */ + void + startEstablishTracing(); + + /** Update establish span attributes for the current iteration. + * + * Overwrites convergence metrics (converge_percent, establish_count, + * proposers) on each call so the final span always reflects the last + * state before consensus was reached. + */ + void + updateEstablishTracing(); + + /** End the establish span when transitioning to the accepted phase. + * + * Resets establishSpan_, which triggers the SpanGuard destructor and + * ends the span. + */ + void + endEstablishTracing(); +#endif + // Journal for debugging beast::Journal const j_; }; @@ -1301,6 +1345,10 @@ Consensus::phaseEstablish(std::unique_ptr const& clo // can only establish consensus if we already took a stance XRPL_ASSERT(result_, "xrpl::Consensus::phaseEstablish : result is set"); +#ifdef XRPL_ENABLE_TELEMETRY + startEstablishTracing(); +#endif + ++peerUnchangedCounter_; ++establishCounter_; @@ -1318,6 +1366,10 @@ Consensus::phaseEstablish(std::unique_ptr const& clo << "previous round duration: " << prevRoundTime_.count() << "ms, " << "avMIN_CONSENSUS_TIME: " << parms.avMIN_CONSENSUS_TIME.count() << "ms. "; +#ifdef XRPL_ENABLE_TELEMETRY + updateEstablishTracing(); +#endif + // Give everyone a chance to take an initial position if (result_->roundTime.read() < parms.ledgerMIN_CONSENSUS) { @@ -1345,6 +1397,11 @@ Consensus::phaseEstablish(std::unique_ptr const& clo adaptor_.updateOperatingMode(currPeerPositions_.size()); prevProposers_ = currPeerPositions_.size(); prevRoundTime_ = result_->roundTime.read(); + +#ifdef XRPL_ENABLE_TELEMETRY + endEstablishTracing(); +#endif + phase_ = ConsensusPhase::accepted; JLOG(j_.debug()) << "transitioned to ConsensusPhase::accepted"; adaptor_.onAccept( @@ -1357,6 +1414,40 @@ Consensus::phaseEstablish(std::unique_ptr const& clo adaptor_.validating()); } +#ifdef XRPL_ENABLE_TELEMETRY +template +void +Consensus::startEstablishTracing() +{ + if (!establishSpan_ && adaptor_.getTelemetry().shouldTraceConsensus()) + { + establishSpan_.emplace(adaptor_.getTelemetry().startSpan("consensus.establish")); + } +} + +template +void +Consensus::updateEstablishTracing() +{ + if (establishSpan_) + { + establishSpan_->setAttribute( + "xrpl.consensus.converge_percent", static_cast(convergePercent_)); + establishSpan_->setAttribute( + "xrpl.consensus.establish_count", static_cast(establishCounter_)); + establishSpan_->setAttribute( + "xrpl.consensus.proposers", static_cast(currPeerPositions_.size())); + } +} + +template +void +Consensus::endEstablishTracing() +{ + establishSpan_.reset(); +} +#endif // XRPL_ENABLE_TELEMETRY + template void Consensus::closeLedger(std::unique_ptr const& clog) @@ -1419,6 +1510,31 @@ Consensus::updateOurPositions(std::unique_ptr const& { // We must have a position if we are updating it XRPL_ASSERT(result_, "xrpl::Consensus::updateOurPositions : result is set"); + + /// @brief Scoped span tracking a single position-update pass. + /// Records the number of active disputes, current convergence + /// percentage, and total proposers. Dispute resolution events are + /// recorded as span events with the affected transaction ID and vote. + XRPL_TRACE_CONSENSUS(adaptor_.getTelemetry(), "consensus.update_positions"); + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.disputes_count", static_cast(result_->disputes.size())); + XRPL_TRACE_SET_ATTR("xrpl.consensus.converge_percent", static_cast(convergePercent_)); + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.proposers_total", static_cast(currPeerPositions_.size())); + + /// Count peers that agree with our current position and record as + /// an attribute on the update_positions span. + { + int agreedCount = 0; + auto const ourPos = result_->position.position(); + for (auto const& [nodeId, peerPos] : currPeerPositions_) + { + if (peerPos.proposal().position() == ourPos) + ++agreedCount; + } + XRPL_TRACE_SET_ATTR("xrpl.consensus.proposers_agreed", static_cast(agreedCount)); + } + ConsensusParms const& parms = adaptor_.parms(); // Compute a cutoff time @@ -1465,6 +1581,15 @@ Consensus::updateOurPositions(std::unique_ptr const& if (dispute.updateVote( convergePercent_, mode_.get() == ConsensusMode::proposing, parms)) { + /// Record dispute resolution event with transaction ID, + /// new vote direction, and current yay/nay counts. + XRPL_TRACE_ADD_EVENT( + "dispute.resolve", + {{"xrpl.dispute.tx_id", to_string(txId)}, + {"xrpl.dispute.our_vote", dispute.getOurVote()}, + {"xrpl.dispute.yays", static_cast(dispute.getYays())}, + {"xrpl.dispute.nays", static_cast(dispute.getNays())}}); + if (!mutableSet) mutableSet.emplace(result_->txns); @@ -1600,6 +1725,12 @@ Consensus::haveConsensus(std::unique_ptr const& clog // Must have a stance if we are checking for consensus XRPL_ASSERT(result_, "xrpl::Consensus::haveConsensus : has result"); + /// @brief Scoped span tracking a single consensus-check pass. + /// Records the number of agreeing/disagreeing peers, convergence + /// percentage, and the resulting ConsensusState (Yes/No/MovedOn/Expired). + /// Also captures the current avalanche threshold percentage. + XRPL_TRACE_CONSENSUS(adaptor_.getTelemetry(), "consensus.check"); + // CHECKME: should possibly count unacquired TX sets as disagreeing int agree = 0, disagree = 0; @@ -1620,11 +1751,22 @@ Consensus::haveConsensus(std::unique_ptr const& clog ++disagree; } } + + /// Record agreement counts and convergence progress on the span. + XRPL_TRACE_SET_ATTR("xrpl.consensus.agree_count", static_cast(agree)); + XRPL_TRACE_SET_ATTR("xrpl.consensus.disagree_count", static_cast(disagree)); + XRPL_TRACE_SET_ATTR("xrpl.consensus.converge_percent", static_cast(convergePercent_)); + auto currentFinished = adaptor_.proposersFinished(previousLedger_, prevLedgerID_); JLOG(j_.debug()) << "Checking for TX consensus: agree=" << agree << ", disagree=" << disagree; ConsensusParms const& parms = adaptor_.parms(); + + /// Record the minimum consensus threshold percentage (typically 80%). + XRPL_TRACE_SET_ATTR( + "xrpl.consensus.threshold_percent", static_cast(parms.minCONSENSUS_PCT)); + // Stalling is BAD. It means that we have a consensus on the close time, so // peers are talking, but we have disputed transactions that peers are // unable or unwilling to come to agreement on one way or the other. @@ -1657,6 +1799,27 @@ Consensus::haveConsensus(std::unique_ptr const& clog j_, clog); + /// Record the consensus check outcome as a string attribute. + { + char const* stateStr = "unknown"; + switch (result_->state) + { + case ConsensusState::No: + stateStr = "no"; + break; + case ConsensusState::MovedOn: + stateStr = "moved_on"; + break; + case ConsensusState::Yes: + stateStr = "yes"; + break; + case ConsensusState::Expired: + stateStr = "expired"; + break; + } + XRPL_TRACE_SET_ATTR("xrpl.consensus.result", stateStr); + } + if (result_->state == ConsensusState::No) { CLOG(clog) << "No consensus. "; diff --git a/src/xrpld/consensus/DisputedTx.h b/src/xrpld/consensus/DisputedTx.h index 89cb5115bb..eb987f693d 100644 --- a/src/xrpld/consensus/DisputedTx.h +++ b/src/xrpld/consensus/DisputedTx.h @@ -58,6 +58,20 @@ public: return ourVote_; } + //! Number of peers voting to include the transaction. + [[nodiscard]] int + getYays() const + { + return yays_; + } + + //! Number of peers voting to exclude the transaction. + [[nodiscard]] int + getNays() const + { + return nays_; + } + //! Are we and our peers "stalled" where we probably won't change //! our vote? bool diff --git a/src/xrpld/telemetry/TracingInstrumentation.h b/src/xrpld/telemetry/TracingInstrumentation.h index 39177ea95e..8363a8ba21 100644 --- a/src/xrpld/telemetry/TracingInstrumentation.h +++ b/src/xrpld/telemetry/TracingInstrumentation.h @@ -123,6 +123,26 @@ namespace telemetry { _xrpl_guard_->recordException(e); \ } +/** Add a named event with attributes to the current trace span. + + Uses the `_xrpl_guard_` local variable created by XRPL_TRACE_* macros. + Example: + @code + XRPL_TRACE_ADD_EVENT("dispute.resolve", { + {"xrpl.tx.id", std::string(tx_id)}, + {"xrpl.dispute.our_vote", our_vote} + }); + @endcode +*/ +#define XRPL_TRACE_ADD_EVENT(name, ...) \ + do \ + { \ + if (_xrpl_guard_.has_value()) \ + { \ + _xrpl_guard_->addEvent(name, __VA_ARGS__); \ + } \ + } while (0) + } // namespace telemetry } // namespace xrpl @@ -137,5 +157,6 @@ namespace telemetry { #define XRPL_TRACE_LEDGER(_tel_obj_, _span_name_) ((void)0) #define XRPL_TRACE_SET_ATTR(key, value) ((void)0) #define XRPL_TRACE_EXCEPTION(e) ((void)0) +#define XRPL_TRACE_ADD_EVENT(name, ...) ((void)0) #endif // XRPL_ENABLE_TELEMETRY