fix(telemetry): address Phase 2 code review findings

- Move node health attribute strings to compile-time constants in
  SpanNames.h (attr::nodeAmendmentBlocked, attr::nodeServerState)
- Add Tempo search filters for node health attributes
- Remove unnecessary .c_str() on strOperatingMode() return
- Add samplingRatio clamping test (values > 1.0 and < 0.0)
- Fix Task 2.3 status: delivered in Phase 1c, not Phase 2

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-04-20 15:41:23 +01:00
parent 810999a006
commit c59ea77bdc
5 changed files with 35 additions and 4 deletions

View File

@@ -167,13 +167,13 @@ This surfaces all RPCs served during a blocked period — critical for post-inci
| ---- | ------------------------------------------- | ------------------- | ------------------------------------------------ |
| 2.1 | W3C Trace Context header extraction | Deferred → Phase 3 | No consumer in Phase 2; needs cross-node tracing |
| 2.2 | Per-category span creation | Complete (Phase 1c) | Superseded by TraceCategory enum + SpanGuard |
| 2.3 | Add shouldTraceLedger() interface method | Complete | All 3 implementations present |
| 2.3 | Add shouldTraceLedger() interface method | Complete (Phase 1c) | Delivered in Phase 1c base branch |
| 2.4 | Unit tests for core telemetry | Complete | TelemetryConfig + SpanGuardFactory tests |
| 2.5 | Enhanced RPC span attributes (HTTP-level) | Deferred | Low value; span duration covers timing natively |
| 2.6 | Build verification and performance baseline | Complete | Verified in CI on Phase 1c |
| 2.7 | Grafana Tempo search filters | Complete | rpc-command, rpc-status, rpc-role filters |
| 2.8 | RPC span attribute enrichment (node health) | Complete | amendment_blocked + server_state |
**Delivered in this branch**: Tasks 2.3, 2.4, 2.6, 2.7, 2.8.
**Delivered in this branch**: Tasks 2.4, 2.7, 2.8.
**Deferred with rationale**: Tasks 2.1 (→Phase 3), 2.5 (low priority).
**Superseded**: Task 2.2 (Phase 1c SpanGuard factory covers this).

View File

@@ -106,3 +106,14 @@ datasources:
operator: "="
scope: span
type: dynamic
# Phase 2: Node health filters (Task 2.8)
- id: node-amendment-blocked
tag: xrpl.node.amendment_blocked
operator: "="
scope: span
type: static
- id: node-server-state
tag: xrpl.node.server_state
operator: "="
scope: span
type: dynamic

View File

@@ -100,6 +100,13 @@ namespace attr {
inline constexpr auto networkId = join(join(seg::xrpl, seg::network), makeStr("id"));
inline constexpr auto networkType = join(join(seg::xrpl, seg::network), makeStr("type"));
inline constexpr auto linkType = join(join(seg::xrpl, seg::link), makeStr("type"));
/// Node health attributes (cross-cutting, used by RPC/consensus/tx spans).
inline constexpr auto xrplNode = join(seg::xrpl, makeStr("node"));
/// "xrpl.node.amendment_blocked"
inline constexpr auto nodeAmendmentBlocked = join(xrplNode, makeStr("amendment_blocked"));
/// "xrpl.node.server_state"
inline constexpr auto nodeServerState = join(xrplNode, makeStr("server_state"));
} // namespace attr
// ===== Shared attribute values =============================================

View File

@@ -106,3 +106,16 @@ TEST(TelemetryConfig, null_telemetry_factory)
tel->start();
tel->stop();
}
TEST(TelemetryConfig, sampling_ratio_clamped)
{
Section section;
section.set("sampling_ratio", "2.5");
auto setup = telemetry::setup_Telemetry(section, "nHUtest123", "2.0.0", 0);
EXPECT_DOUBLE_EQ(setup.samplingRatio, 1.0);
Section section2;
section2.set("sampling_ratio", "-0.5");
auto setup2 = telemetry::setup_Telemetry(section2, "nHUtest123", "2.0.0", 0);
EXPECT_DOUBLE_EQ(setup2.samplingRatio, 0.0);
}

View File

@@ -167,8 +167,8 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
rpc_span::attr::role,
context.role == Role::ADMIN ? std::string_view(rpc_span::val::admin)
: std::string_view(rpc_span::val::user));
span.setAttribute("xrpl.node.amendment_blocked", context.app.getOPs().isAmendmentBlocked());
span.setAttribute("xrpl.node.server_state", context.app.getOPs().strOperatingMode().c_str());
span.setAttribute(attr::nodeAmendmentBlocked, context.app.getOPs().isAmendmentBlocked());
span.setAttribute(attr::nodeServerState, context.app.getOPs().strOperatingMode());
static std::atomic<std::uint64_t> requestId{0};
auto& perfLog = context.app.getPerfLog();