diff --git a/.github/workflows/telemetry-validation.yml b/.github/workflows/telemetry-validation.yml index 8b4bdcf952..e65505bf88 100644 --- a/.github/workflows/telemetry-validation.yml +++ b/.github/workflows/telemetry-validation.yml @@ -255,7 +255,12 @@ jobs: exit 1 fi - IS_PLACEHOLDER=$(jq -e -r '.placeholder == true or (.metrics | length == 0)' "$BASELINE") || { + # NOTE: do NOT use `jq -e` here. With -e, jq exits non-zero when the + # filter's result is boolean false — which is the normal case for a + # populated (non-placeholder) baseline — and that would be + # misreported as a parse failure. Plain `jq -r` exits 0 on any valid + # JSON, so a real non-zero exit genuinely means malformed JSON. + IS_PLACEHOLDER=$(jq -r '.placeholder == true or (.metrics | length == 0)' "$BASELINE") || { echo "::error::Failed to parse baseline JSON" exit 1 } diff --git a/docker/telemetry/workload/expected_metrics.json b/docker/telemetry/workload/expected_metrics.json index 2758dc32ac..ab417ec226 100644 --- a/docker/telemetry/workload/expected_metrics.json +++ b/docker/telemetry/workload/expected_metrics.json @@ -39,17 +39,13 @@ "xrpld_State_Accounting_Full_duration", "xrpld_Peer_Finder_Active_Inbound_Peers", "xrpld_Peer_Finder_Active_Outbound_Peers", - "xrpld_job_count" + "xrpld_jobq_job_count" ] }, "statsd_counters": { "description": "beast::insight counters exported via OTLP/HTTP. The OTel Prometheus exporter appends _total to monotonic counters.", "metrics": ["xrpld_rpc_requests_total", "xrpld_ledger_fetches_total"] }, - "statsd_histograms": { - "description": "beast::insight timers/histograms exported via OTLP/HTTP.", - "metrics": ["xrpld_rpc_time", "xrpld_rpc_size"] - }, "overlay_traffic": { "description": "Overlay traffic metrics (subset — full list has 45+ categories).", "metrics": [ diff --git a/docker/telemetry/workload/expected_spans.json b/docker/telemetry/workload/expected_spans.json index c8f3cc8246..1b6530e583 100644 --- a/docker/telemetry/workload/expected_spans.json +++ b/docker/telemetry/workload/expected_spans.json @@ -44,9 +44,9 @@ "name": "tx.receive", "category": "transaction", "parent": null, - "required_attributes": ["tx_hash", "peer_id", "suppressed", "tx_status"], + "required_attributes": ["tx_hash", "peer_id", "suppressed"], "config_flag": "trace_transactions", - "note": "Cross-node span: parent context propagated from the sender's tx.process via protobuf. Also carries tx_type and peer_version." + "note": "Cross-node span: parent context propagated from the sender's tx.process via protobuf. Also carries tx_type and peer_version. tx_status is only set when a tx is suppressed/known-bad, so it is not a required attribute on every tx.receive." }, { "name": "tx.apply", @@ -352,14 +352,18 @@ "category": "pathfind", "parent": "pathfind.request", "required_attributes": ["pathfind_fast"], - "config_flag": "trace_rpc" + "config_flag": "trace_rpc", + "optional": true, + "note": "Only fires when PathRequest::doUpdate runs a computation; the self-to-self XRP probe from the load generator returns early without computing paths in a fresh cluster with no liquidity." }, { "name": "pathfind.discover", "category": "pathfind", "parent": "pathfind.compute", "required_attributes": ["pathfind_search_level", "pathfind_num_paths"], - "config_flag": "trace_rpc" + "config_flag": "trace_rpc", + "optional": true, + "note": "Graph exploration; only fires under pathfind.compute, which needs real path liquidity not present in the fresh test cluster." }, { "name": "pathfind.update_all", @@ -411,7 +415,9 @@ { "parent": "pathfind.request", "child": "pathfind.compute", - "description": "Pathfind request contains the compute sub-span" + "description": "Pathfind request contains the compute sub-span", + "skip": true, + "skip_reason": "pathfind.compute only fires when a path computation actually runs; the self-to-self XRP probe in a fresh cluster with no liquidity returns before computing, so the child is not emitted under the harness workload." } ], "total_span_types": 40,