mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-07 02:36:47 +00:00
The Phase 10 validation harness had drifted from the code's recording surface and the telemetry-validation CI job was failing before it could build. CI fix (telemetry-validation.yml): - Replace nonexistent local action ./.github/actions/print-env with the remote XRPLF/actions/print-build-env (the build-xrpld job failed in 56s on this). - Sync prepare-runner and upload-artifact action SHAs to the canonical workflow. Recording-surface reconciliation (docker/telemetry/workload/): - Migrate span attributes from dotted xrpl.<domain>.<field> to the bare/underscore form introduced by the 2026-05-13 span-attr naming redesign (tx_hash, peer_id, ledger_seq, consensus_mode, consensus_round, full_validation, quorum, ...). Dotted xrpl.ledger.hash is retained only on peer.validation.receive (shared constant), while consensus.validation.send uses bare ledger_hash. - Fix attribute placement: tx.apply carries tx_count/tx_failed (not ledger_seq); ledger.build carries ledger_seq/close_* (not tx_count/tx_failed). - Replace the phantom rpc.request span with the real WS root rpc.ws_message; drop the never-emitted duration_ms; rebuild the parent-child map accordingly. - Add the new spans the code emits: apply-pipeline stage spans (tx.preflight/preclaim/transactor with stage/tx_type/ter_result), txq.*, consensus sub-spans (round/establish/update_positions/check/phase.open), ledger.acquire, grpc.*, pathfind.*. Conditional spans are marked optional so they are skipped (not failed) when the workload does not exercise them. - validate_telemetry.py: service.name and Loki job label rippled -> xrpld; fix PARITY_SPAN_ATTRS (rename the 4 real attrs, drop the 3 that are metrics not span attrs); add optional-span handling that skips missing optional spans while still validating attributes when present. - expected_metrics.json: rippled_ -> xrpld_ on all beast::insight/overlay metrics, xrpld_job_count, the 15 on-disk xrpld-* dashboard UIDs, and the real bare spanmetrics dimension labels. - regression-metrics.json + baseline-timings.json: rpc.request -> rpc.ws_message. Metrics pipeline fix: - Switch node [insight] config from server=statsd/prefix=rippled to server=otel + /v1/metrics endpoint + prefix=xrpld across run-full-validation.sh, xrpld-validator.cfg.template, benchmark.sh and the workload compose. The collector has no StatsD receiver, so system metrics only reach Prometheus over OTLP. Synthetic load for new spans: - Add ripple_path_find to the RPC load generator (drives pathfind.* spans). - Add a high-TPS txq-burst workload phase to force fee escalation (drives txq.*). All facts verified against the *SpanNames.h headers and a live xrpld node + collector (Tempo service.name=xrpld, tx.preflight attrs [stage,ter_result,tx_type], 279 xrpld_ Prometheus metrics and zero rippled_). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
98 lines
2.1 KiB
Plaintext
98 lines
2.1 KiB
Plaintext
# xrpld validator node configuration template for workload harness.
|
|
#
|
|
# Placeholders (replaced by docker-compose entrypoint):
|
|
# {{NODE_INDEX}} — Node number (1-based)
|
|
# {{RPC_PORT}} — HTTP RPC port
|
|
# {{WS_PORT}} — WebSocket port
|
|
# {{PEER_PORT}} — Peer protocol port
|
|
# {{DATA_DIR}} — Node data directory
|
|
# {{VALIDATION_SEED}} — Validator seed from key generation
|
|
# {{VALIDATORS_FILE}} — Path to shared validators.txt
|
|
# {{IPS_FIXED}} — Peer addresses (one per line)
|
|
# {{OTEL_ENDPOINT}} — OTel Collector OTLP/HTTP traces endpoint
|
|
# {{OTEL_METRICS_ENDPOINT}} — OTel Collector OTLP/HTTP metrics endpoint
|
|
# {{LOG_LEVEL}} — Log level (debug, info, warning, error)
|
|
|
|
[server]
|
|
port_rpc
|
|
port_ws
|
|
port_peer
|
|
|
|
[port_rpc]
|
|
port = {{RPC_PORT}}
|
|
ip = 0.0.0.0
|
|
admin = 0.0.0.0
|
|
protocol = http
|
|
|
|
[port_ws]
|
|
port = {{WS_PORT}}
|
|
ip = 0.0.0.0
|
|
admin = 0.0.0.0
|
|
protocol = ws
|
|
|
|
[port_peer]
|
|
port = {{PEER_PORT}}
|
|
ip = 0.0.0.0
|
|
protocol = peer
|
|
|
|
[node_db]
|
|
type=NuDB
|
|
path={{DATA_DIR}}/nudb
|
|
online_delete=256
|
|
|
|
[database_path]
|
|
{{DATA_DIR}}/db
|
|
|
|
[debug_logfile]
|
|
{{DATA_DIR}}/debug.log
|
|
|
|
[validation_seed]
|
|
{{VALIDATION_SEED}}
|
|
|
|
[validators_file]
|
|
{{VALIDATORS_FILE}}
|
|
|
|
[ips_fixed]
|
|
{{IPS_FIXED}}
|
|
|
|
[peer_private]
|
|
1
|
|
|
|
# --- OpenTelemetry tracing (all categories enabled) ---
|
|
[telemetry]
|
|
enabled=1
|
|
service_instance_id=validator-{{NODE_INDEX}}
|
|
endpoint={{OTEL_ENDPOINT}}
|
|
exporter=otlp_http
|
|
sampling_ratio=1.0
|
|
batch_size=512
|
|
batch_delay_ms=2000
|
|
max_queue_size=2048
|
|
trace_rpc=1
|
|
trace_transactions=1
|
|
trace_consensus=1
|
|
trace_peer=1
|
|
trace_ledger=1
|
|
|
|
# --- Native OTel metrics (beast::insight over OTLP/HTTP) ---
|
|
# The collector has no StatsD receiver (metrics pipeline is [otlp, spanmetrics]),
|
|
# so beast::insight exports natively over OTLP. prefix=xrpld matches the OTel
|
|
# resource service name and the xrpld_* names the dashboards query.
|
|
[insight]
|
|
server=otel
|
|
endpoint={{OTEL_METRICS_ENDPOINT}}
|
|
prefix=xrpld
|
|
|
|
[rpc_startup]
|
|
{ "command": "log_level", "severity": "{{LOG_LEVEL}}" }
|
|
|
|
[ssl_verify]
|
|
0
|
|
|
|
# --- Network tuning for local cluster ---
|
|
[network_id]
|
|
0
|
|
|
|
[sntp_servers]
|
|
time.google.com
|