mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-07 02:36:47 +00:00
The print-env CI fix let the Telemetry Stack Validation job build and run the workload harness end-to-end for the first time. It reported 129/136 checks passing; this commit fixes the 7 real failures plus a latent regression-gate bug. Validation-suite fixes (verified against the CI run's actual emission + live node): - expected_metrics.json: the beast::insight job-depth gauge is `xrpld_jobq_job_count`, not `xrpld_job_count` (the latter is a Phase 9 OTel counter). Reverted the prior rename. Removed the statsd_histograms block (`xrpld_rpc_time`/`xrpld_rpc_size`): these RPC timers do not emit under the WS workload (0 series in CI). - expected_spans.json: `tx_status` is only set on suppressed/known-bad receives, so it is no longer a required attribute of every `tx.receive`. Marked `pathfind.compute` and `pathfind.discover` optional and the `pathfind.request -> pathfind.compute` hierarchy as skip — the self-to-self XRP probe returns before computing paths in a fresh cluster with no liquidity, so only `pathfind.request` fires. Regression-gate bug (telemetry-validation.yml "Print regression summary"): - `jq -e` exits non-zero when its filter result is boolean false — the normal case for a populated (non-placeholder) baseline — which was misreported as "Failed to parse baseline JSON" and failed the job. Dropped `-e` (kept `-r`) so a non-zero exit genuinely means malformed JSON. The optional-span handling and regression comparison both worked correctly in the CI run (txq.* / pathfind.update_all skipped-when-absent, 0 regressions detected). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
147 lines
5.4 KiB
JSON
147 lines
5.4 KiB
JSON
{
|
|
"description": "Expected metric inventory for xrpld telemetry validation. Metric names use the xrpld_ prefix (the [insight] prefix and OTel resource service name). Sourced from the live Grafana dashboards and MetricsRegistry.cpp.",
|
|
"spanmetrics": {
|
|
"description": "SpanMetrics-derived RED metrics from the OTel Collector spanmetrics connector.",
|
|
"metrics": [
|
|
"traces_span_metrics_calls_total",
|
|
"traces_span_metrics_duration_milliseconds_bucket",
|
|
"traces_span_metrics_duration_milliseconds_count",
|
|
"traces_span_metrics_duration_milliseconds_sum"
|
|
],
|
|
"required_labels": [
|
|
"span_name",
|
|
"status_code",
|
|
"service_name",
|
|
"span_kind"
|
|
],
|
|
"dimension_labels": [
|
|
"command",
|
|
"rpc_status",
|
|
"consensus_mode",
|
|
"local",
|
|
"proposal_trusted",
|
|
"validation_trusted",
|
|
"tx_type",
|
|
"ter_result",
|
|
"stage",
|
|
"txq_status",
|
|
"close_time_correct",
|
|
"consensus_state",
|
|
"suppressed"
|
|
],
|
|
"_dimension_labels_note": "Bare label names as configured in otel-collector-config.yaml spanmetrics dimensions. Informational only (not asserted by the validator)."
|
|
},
|
|
"statsd_gauges": {
|
|
"description": "beast::insight gauges exported via OTLP/HTTP to the collector (server=otel).",
|
|
"metrics": [
|
|
"xrpld_LedgerMaster_Validated_Ledger_Age",
|
|
"xrpld_LedgerMaster_Published_Ledger_Age",
|
|
"xrpld_State_Accounting_Full_duration",
|
|
"xrpld_Peer_Finder_Active_Inbound_Peers",
|
|
"xrpld_Peer_Finder_Active_Outbound_Peers",
|
|
"xrpld_jobq_job_count"
|
|
]
|
|
},
|
|
"statsd_counters": {
|
|
"description": "beast::insight counters exported via OTLP/HTTP. The OTel Prometheus exporter appends _total to monotonic counters.",
|
|
"metrics": ["xrpld_rpc_requests_total", "xrpld_ledger_fetches_total"]
|
|
},
|
|
"overlay_traffic": {
|
|
"description": "Overlay traffic metrics (subset — full list has 45+ categories).",
|
|
"metrics": [
|
|
"xrpld_total_Bytes_In",
|
|
"xrpld_total_Bytes_Out",
|
|
"xrpld_total_Messages_In",
|
|
"xrpld_total_Messages_Out"
|
|
]
|
|
},
|
|
"phase9_nodestore": {
|
|
"description": "Phase 9 NodeStore I/O observable gauge (MetricsRegistry via OTLP). Single metric with 'metric' label distinguishing sub-metrics.",
|
|
"metrics": ["xrpld_nodestore_state"]
|
|
},
|
|
"phase9_cache": {
|
|
"description": "Phase 9 cache hit rate observable gauge (MetricsRegistry via OTLP). Single metric with 'metric' label.",
|
|
"metrics": ["xrpld_cache_metrics"]
|
|
},
|
|
"phase9_txq": {
|
|
"description": "Phase 9 transaction queue observable gauge (MetricsRegistry via OTLP). Single metric with 'metric' label.",
|
|
"metrics": ["xrpld_txq_metrics"]
|
|
},
|
|
"phase9_rpc_method": {
|
|
"description": "Phase 9 per-RPC-method counters (MetricsRegistry via OTLP).",
|
|
"metrics": ["xrpld_rpc_method_started_total"]
|
|
},
|
|
"phase9_objects": {
|
|
"description": "Phase 9 counted object instances observable gauge (MetricsRegistry via OTLP).",
|
|
"metrics": ["xrpld_object_count"]
|
|
},
|
|
"phase9_load": {
|
|
"description": "Phase 9 fee escalation and load factor observable gauge (MetricsRegistry via OTLP).",
|
|
"metrics": ["xrpld_load_factor_metrics"]
|
|
},
|
|
"parity_validation_agreement": {
|
|
"description": "External dashboard parity: validation agreement percentages (MetricsRegistry).",
|
|
"metrics": [
|
|
"xrpld_validation_agreement{metric=\"agreement_pct_1h\"}",
|
|
"xrpld_validation_agreement{metric=\"agreement_pct_24h\"}"
|
|
]
|
|
},
|
|
"parity_validator_health": {
|
|
"description": "External dashboard parity: validator health indicators (MetricsRegistry).",
|
|
"metrics": [
|
|
"xrpld_validator_health{metric=\"amendment_blocked\"}",
|
|
"xrpld_validator_health{metric=\"unl_expiry_days\"}"
|
|
]
|
|
},
|
|
"parity_peer_quality": {
|
|
"description": "External dashboard parity: peer quality metrics (MetricsRegistry).",
|
|
"metrics": [
|
|
"xrpld_peer_quality{metric=\"peer_latency_p90_ms\"}",
|
|
"xrpld_peer_quality{metric=\"peers_insane_count\"}"
|
|
]
|
|
},
|
|
"parity_ledger_economy": {
|
|
"description": "External dashboard parity: ledger economy metrics (MetricsRegistry).",
|
|
"metrics": [
|
|
"xrpld_ledger_economy{metric=\"base_fee_xrp\"}",
|
|
"xrpld_ledger_economy{metric=\"transaction_rate\"}"
|
|
]
|
|
},
|
|
"parity_state_tracking": {
|
|
"description": "External dashboard parity: server state tracking (MetricsRegistry).",
|
|
"metrics": ["xrpld_state_tracking{metric=\"state_value\"}"]
|
|
},
|
|
"parity_counters": {
|
|
"description": "External dashboard parity: monotonic counters (MetricsRegistry).",
|
|
"metrics": [
|
|
"xrpld_ledgers_closed_total",
|
|
"xrpld_validations_sent_total",
|
|
"xrpld_state_changes_total"
|
|
]
|
|
},
|
|
"parity_storage": {
|
|
"description": "External dashboard parity: storage detail metrics (MetricsRegistry).",
|
|
"metrics": ["xrpld_storage_detail{metric=\"nudb_bytes\"}"]
|
|
},
|
|
"grafana_dashboards": {
|
|
"description": "All Grafana dashboards that must render data (UIDs as provisioned on disk under docker/telemetry/grafana/dashboards/).",
|
|
"uids": [
|
|
"xrpld-rpc-perf",
|
|
"xrpld-rpc-perf-otel",
|
|
"xrpld-transactions",
|
|
"xrpld-consensus",
|
|
"xrpld-ledger-ops",
|
|
"xrpld-peer-net",
|
|
"xrpld-peer-quality",
|
|
"xrpld-fee-market",
|
|
"xrpld-job-queue",
|
|
"xrpld-validator-health",
|
|
"xrpld-system-node-health",
|
|
"xrpld-system-network",
|
|
"xrpld-system-rpc",
|
|
"xrpld-system-overlay-detail",
|
|
"xrpld-system-ledger-sync"
|
|
]
|
|
}
|
|
}
|