refactor(telemetry): simplify ledger/peer attr naming on phase-6, update dashboards

- Add canonical ledgerHash (xrpl.ledger.hash) to SpanNames.h.
- LedgerSpanNames: reuse shared canonicals (ledgerSeq, closeTime,
  closeTimeCorrect, closeResolutionMs, ledgerHash); bare names for
  tx_count, tx_failed, validations.
- PeerSpanNames: reuse shared canonicals (peerId, ledgerHash); bare
  names for proposal_trusted, validation_full, validation_trusted.
- Update call sites in BuildLedger.cpp, LedgerMaster.cpp, PeerImp.cpp.
- Update 5 Grafana dashboards: strip xrpl.<domain>. prefix from
  per-span attr refs in PromQL/TraceQL queries. Keep rule-5 entries.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-05-13 16:16:30 +01:00
parent e60efd4d2f
commit 9e27120a15
11 changed files with 74 additions and 87 deletions

View File

@@ -10,7 +10,7 @@
"panels": [
{
"title": "Consensus Round Duration",
"description": "p95 and p50 duration of consensus accept rounds. The consensus.accept span (RCLConsensus.cpp:395) measures the time to process an accepted ledger including transaction application and state finalization. The span carries xrpl.consensus.proposers and xrpl.consensus.round_time_ms attributes. Normal range is 3-6 seconds on mainnet.",
"description": "p95 and p50 duration of consensus accept rounds. The consensus.accept span (RCLConsensus.cpp:395) measures the time to process an accepted ledger including transaction application and state finalization. The span carries proposers and round_time_ms attributes. Normal range is 3-6 seconds on mainnet.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -95,7 +95,7 @@
},
{
"title": "Ledger Close Duration",
"description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp:282) measures the time from when consensus triggers a ledger close to completion. Carries xrpl.consensus.ledger.seq and xrpl.consensus.mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.",
"description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp:282) measures the time from when consensus triggers a ledger close to completion. Carries xrpl.ledger.seq and xrpl.consensus.mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -134,7 +134,7 @@
},
{
"title": "Validation Send Rate",
"description": "Rate at which this node sends ledger validations to the network. Sourced from the consensus.validation.send span (RCLConsensus.cpp:753). Each validation confirms the node has fully validated a ledger. The span carries xrpl.consensus.ledger.seq and xrpl.consensus.proposing. Should closely track the ledger close rate when the node is healthy.",
"description": "Rate at which this node sends ledger validations to the network. Sourced from the consensus.validation.send span (RCLConsensus.cpp:753). Each validation confirms the node has fully validated a ledger. The span carries xrpl.ledger.seq and proposing. Should closely track the ledger close rate when the node is healthy.",
"type": "stat",
"gridPos": {
"h": 8,
@@ -206,7 +206,7 @@
},
{
"title": "Close Time Agreement",
"description": "Rate of close time agreement vs disagreement across consensus rounds. Based on xrpl.consensus.close_time_correct attribute (true = validators agreed, false = agreed to disagree per avCT_CONSENSUS_PCT).",
"description": "Rate of close time agreement vs disagreement across consensus rounds. Based on close_time_correct attribute (true = validators agreed, false = agreed to disagree per avCT_CONSENSUS_PCT).",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -219,8 +219,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_consensus_close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", xrpl_consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
"legendFormat": "Close Time Correct={{xrpl_consensus_close_time_correct}} [{{exported_instance}}]"
"expr": "sum by (close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", xrpl_consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
"legendFormat": "Close Time Correct={{close_time_correct}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -400,7 +400,7 @@
},
{
"title": "Close Time: Raw Proposals (Per Node)",
"description": "Each node's raw proposed close time (xrpl.consensus.close_time_self) \u2014 the unrounded wall clock value at the moment the node closed its ledger. Compare across nodes to see clock drift.",
"description": "Each node's raw proposed close time (close_time_self) \u2014 the unrounded wall clock value at the moment the node closed its ledger. Compare across nodes to see clock drift.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -436,14 +436,14 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time_self)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time_self)",
"refId": "A"
}
]
},
{
"title": "Close Time: Effective / Quantized",
"description": "The consensus-agreed close time after rounding to the current resolution bin (xrpl.consensus.close_time). This is the value written to the ledger header. All nodes in agreement produce the same value.",
"description": "The consensus-agreed close time after rounding to the current resolution bin (close_time). This is the value written to the ledger header. All nodes in agreement produce the same value.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -479,14 +479,14 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time)",
"refId": "A"
}
]
},
{
"title": "Close Time Vote Bins & Resolution",
"description": "Number of distinct close time vote bins (xrpl.consensus.close_time_vote_bins) and the bin size / resolution in ms (xrpl.consensus.close_resolution_ms). More bins = more clock disagreement. Resolution adapts: finer (10s) when validators agree, coarser (120s) when they disagree.",
"description": "Number of distinct close time vote bins (close_time_vote_bins) and the bin size / resolution in ms (close_resolution_ms). More bins = more clock disagreement. Resolution adapts: finer (10s) when validators agree, coarser (120s) when they disagree.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -555,7 +555,7 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time_vote_bins)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time_vote_bins)",
"refId": "A"
},
{
@@ -563,14 +563,14 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_resolution_ms)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_resolution_ms)",
"refId": "B"
}
]
},
{
"title": "Close Time Resolution Direction",
"description": "Whether close time resolution increased (coarser bins, more disagreement), decreased (finer bins, better agreement), or stayed unchanged relative to the previous ledger. Based on xrpl.consensus.resolution_direction attribute.",
"description": "Whether close time resolution increased (coarser bins, more disagreement), decreased (finer bins, better agreement), or stayed unchanged relative to the previous ledger. Based on resolution_direction attribute.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -606,7 +606,7 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\" && span.xrpl.consensus.resolution_direction=~\"$resolution_direction\"} | select(span.xrpl.consensus.resolution_direction)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\" && span.resolution_direction=~\"$resolution_direction\"} | select(span.resolution_direction)",
"refId": "A"
}
]
@@ -650,7 +650,7 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time, span.xrpl.consensus.close_time_vote_bins)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time, span.close_time_vote_bins)",
"refId": "A"
}
]

View File

@@ -88,7 +88,7 @@
},
{
"title": "Ledger Validation Rate",
"description": "Rate at which ledgers pass the validation threshold and are accepted as fully validated. The ledger.validate span (LedgerMaster.cpp:915) fires in checkAccept() only after the ledger receives sufficient trusted validations (>= quorum). Records xrpl.ledger.seq and xrpl.ledger.validations (the number of validations received).",
"description": "Rate at which ledgers pass the validation threshold and are accepted as fully validated. The ledger.validate span (LedgerMaster.cpp:915) fires in checkAccept() only after the ledger receives sufficient trusted validations (>= quorum). Records xrpl.ledger.seq and validations (the number of validations received).",
"type": "stat",
"gridPos": {
"h": 8,
@@ -156,7 +156,7 @@
},
{
"title": "Transaction Apply Duration",
"description": "p95 and p50 duration of applying the consensus transaction set during ledger building. The tx.apply span (BuildLedger.cpp:88) wraps applyTransactions() which iterates through the CanonicalTXSet with multiple retry passes. Records xrpl.ledger.tx_count (successful) and xrpl.ledger.tx_failed (failed) as attributes.",
"description": "p95 and p50 duration of applying the consensus transaction set during ledger building. The tx.apply span (BuildLedger.cpp:88) wraps applyTransactions() which iterates through the CanonicalTXSet with multiple retry passes. Records tx_count (successful) and tx_failed (failed) as attributes.",
"type": "timeseries",
"gridPos": {
"h": 8,

View File

@@ -11,7 +11,7 @@
"panels": [
{
"title": "Peer Proposal Receive Rate",
"description": "Rate of consensus proposals received from network peers. The peer.proposal.receive span (PeerImp.cpp:1667) fires in onMessage(TMProposeSet) for each incoming proposal. Records xrpl.peer.id (sending peer) and xrpl.peer.proposal.trusted (whether the proposer is in our UNL). Requires trace_peer=1 in the telemetry config.",
"description": "Rate of consensus proposals received from network peers. The peer.proposal.receive span (PeerImp.cpp:1667) fires in onMessage(TMProposeSet) for each incoming proposal. Records xrpl.peer.id (sending peer) and proposal_trusted (whether the proposer is in our UNL). Requires trace_peer=1 in the telemetry config.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -50,7 +50,7 @@
},
{
"title": "Peer Validation Receive Rate",
"description": "Rate of ledger validations received from network peers. The peer.validation.receive span (PeerImp.cpp:2264) fires in onMessage(TMValidation) for each incoming validation message. Records xrpl.peer.id (sending peer) and xrpl.peer.validation.trusted (whether the validator is trusted). Requires trace_peer=1 in the telemetry config.",
"description": "Rate of ledger validations received from network peers. The peer.validation.receive span (PeerImp.cpp:2264) fires in onMessage(TMValidation) for each incoming validation message. Records xrpl.peer.id (sending peer) and validation_trusted (whether the validator is trusted). Requires trace_peer=1 in the telemetry config.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -89,7 +89,7 @@
},
{
"title": "Proposals Trusted vs Untrusted",
"description": "Pie chart showing the ratio of proposals received from trusted validators (in our UNL) vs untrusted validators. Grouped by the xrpl.peer.proposal.trusted span attribute (true/false). A healthy node connected to a well-configured UNL should see a significant portion of trusted proposals. Note: proposals that fail early validation may not have the trusted attribute set.",
"description": "Pie chart showing the ratio of proposals received from trusted validators (in our UNL) vs untrusted validators. Grouped by the proposal_trusted span attribute (true/false). A healthy node connected to a well-configured UNL should see a significant portion of trusted proposals. Note: proposals that fail early validation may not have the trusted attribute set.",
"type": "piechart",
"gridPos": {
"h": 8,
@@ -108,8 +108,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_peer_proposal_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_peer_proposal_trusted=~\"$proposal_trusted\", span_name=\"peer.proposal.receive\"}[5m]))",
"legendFormat": "Trusted = {{xrpl_peer_proposal_trusted}} [{{exported_instance}}]"
"expr": "sum by (proposal_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", proposal_trusted=~\"$proposal_trusted\", span_name=\"peer.proposal.receive\"}[5m]))",
"legendFormat": "Trusted = {{proposal_trusted}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -121,7 +121,7 @@
},
{
"title": "Validations Trusted vs Untrusted",
"description": "Pie chart showing the ratio of validations received from trusted validators (in our UNL) vs untrusted validators. Grouped by the xrpl.peer.validation.trusted span attribute (true/false). Monitoring this helps detect if the node is receiving validations from the expected set of trusted validators. Note: validations that fail early checks may not have the trusted attribute set.",
"description": "Pie chart showing the ratio of validations received from trusted validators (in our UNL) vs untrusted validators. Grouped by the validation_trusted span attribute (true/false). Monitoring this helps detect if the node is receiving validations from the expected set of trusted validators. Note: validations that fail early checks may not have the trusted attribute set.",
"type": "piechart",
"gridPos": {
"h": 8,
@@ -140,8 +140,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_peer_validation_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_peer_validation_trusted=~\"$validation_trusted\", span_name=\"peer.validation.receive\"}[5m]))",
"legendFormat": "Trusted = {{xrpl_peer_validation_trusted}} [{{exported_instance}}]"
"expr": "sum by (validation_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", validation_trusted=~\"$validation_trusted\", span_name=\"peer.validation.receive\"}[5m]))",
"legendFormat": "Trusted = {{validation_trusted}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -181,7 +181,7 @@
"label": "Proposal Trusted",
"description": "Filter by proposal trust status (true = from trusted validator)",
"type": "query",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.proposal.receive\"}, xrpl_peer_proposal_trusted)",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.proposal.receive\"}, proposal_trusted)",
"datasource": {
"type": "prometheus",
"uid": "prometheus"
@@ -201,7 +201,7 @@
"label": "Validation Trusted",
"description": "Filter by validation trust status (true = from trusted validator)",
"type": "query",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.validation.receive\"}, xrpl_peer_validation_trusted)",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.validation.receive\"}, validation_trusted)",
"datasource": {
"type": "prometheus",
"uid": "prometheus"

View File

@@ -10,7 +10,7 @@
"panels": [
{
"title": "RPC Request Rate by Command",
"description": "Per-second rate of RPC command executions, broken down by command name (e.g. server_info, submit). Calculated as rate(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}) over a 5m window, grouped by the xrpl.rpc.command span attribute.",
"description": "Per-second rate of RPC command executions, broken down by command name (e.g. server_info, submit). Calculated as rate(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}) over a 5m window, grouped by the command span attribute.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -29,8 +29,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m]))",
"legendFormat": "{{xrpl_rpc_command}} [{{exported_instance}}]"
"expr": "sum by (command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m]))",
"legendFormat": "{{command}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -49,7 +49,7 @@
},
{
"title": "RPC Latency P95 by Command",
"description": "95th percentile response time for each RPC command. Computed from the spanmetrics duration histogram using histogram_quantile(0.95) over rpc.command.* spans, grouped by xrpl.rpc.command. High values indicate slow commands that may need optimization.",
"description": "95th percentile response time for each RPC command. Computed from the spanmetrics duration histogram using histogram_quantile(0.95) over rpc.command.* spans, grouped by command. High values indicate slow commands that may need optimization.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -68,8 +68,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "histogram_quantile(0.95, sum by (le, xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
"legendFormat": "P95 {{xrpl_rpc_command}} [{{exported_instance}}]"
"expr": "histogram_quantile(0.95, sum by (le, command, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
"legendFormat": "P95 {{command}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -107,8 +107,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m])) / sum by (xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) * 100",
"legendFormat": "{{xrpl_rpc_command}} [{{exported_instance}}]"
"expr": "sum by (command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m])) / sum by (command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) * 100",
"legendFormat": "{{command}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -158,7 +158,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) by (le)",
"expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) by (le)",
"legendFormat": "{{le}}",
"format": "heatmap"
}
@@ -185,14 +185,14 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=\"rpc.request\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=\"rpc.request\"}[5m]))",
"legendFormat": "rpc.request / Sec [{{exported_instance}}]"
},
{
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=\"rpc.process\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=\"rpc.process\"}[5m]))",
"legendFormat": "rpc.process / Sec [{{exported_instance}}]"
}
],
@@ -231,14 +231,14 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_UNSET\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_UNSET\"}[5m]))",
"legendFormat": "Success [{{exported_instance}}]"
},
{
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m]))",
"legendFormat": "Error [{{exported_instance}}]"
}
],
@@ -277,8 +277,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "topk(10, sum by (xrpl_rpc_command, exported_instance) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
"legendFormat": "{{xrpl_rpc_command}} [{{exported_instance}}]"
"expr": "topk(10, sum by (command, exported_instance) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
"legendFormat": "{{command}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -309,7 +309,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=\"rpc.ws_message\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=\"rpc.ws_message\"}[5m]))",
"legendFormat": "WS Messages / Sec [{{exported_instance}}]"
}
],
@@ -350,7 +350,7 @@
"label": "RPC Command",
"description": "Filter by RPC command name (e.g., server_info, submit)",
"type": "query",
"query": "label_values(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}, xrpl_rpc_command)",
"query": "label_values(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}, command)",
"datasource": {
"type": "prometheus",
"uid": "prometheus"

View File

@@ -102,7 +102,7 @@
},
{
"title": "Transaction Path Distribution",
"description": "Breakdown of transactions by origin path. The xrpl.tx.local attribute indicates whether the transaction was submitted locally (true) or received from a peer (false). Helps understand the ratio of locally-originated vs relayed transactions.",
"description": "Breakdown of transactions by origin path. The local attribute indicates whether the transaction was submitted locally (true) or received from a peer (false). Helps understand the ratio of locally-originated vs relayed transactions.",
"type": "piechart",
"gridPos": {
"h": 8,
@@ -121,8 +121,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_tx_local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_tx_local=~\"$tx_origin\", span_name=\"tx.process\"}[5m]))",
"legendFormat": "Local = {{xrpl_tx_local}} [{{exported_instance}}]"
"expr": "sum by (local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", local=~\"$tx_origin\", span_name=\"tx.process\"}[5m]))",
"legendFormat": "Local = {{local}} [{{exported_instance}}]"
}
]
},
@@ -282,7 +282,7 @@
},
{
"title": "Transaction Apply Failed Rate",
"description": "Rate of tx.apply spans completing with error status, indicating transaction application failures during ledger building. The span records xrpl.ledger.tx_failed as an attribute. Thresholds: green < 0.1/sec, yellow 0.1-1/sec, red > 1/sec. Some failures are normal (e.g. conflicting offers) but sustained high rates may indicate issues.",
"description": "Rate of tx.apply spans completing with error status, indicating transaction application failures during ledger building. The span records tx_failed as an attribute. Thresholds: green < 0.1/sec, yellow 0.1-1/sec, red > 1/sec. Some failures are normal (e.g. conflicting offers) but sustained high rates may indicate issues.",
"type": "stat",
"gridPos": {
"h": 8,
@@ -358,7 +358,7 @@
"label": "TX Origin",
"description": "Filter by transaction origin (true = local submit, false = peer relay)",
"type": "query",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\"}, xrpl_tx_local)",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\"}, local)",
"datasource": {
"type": "prometheus",
"uid": "prometheus"

View File

@@ -122,6 +122,7 @@ inline constexpr auto ledgerSeq = join(join(seg::xrpl, seg::ledger), makeStr("se
inline constexpr auto closeTime = makeStr("close_time");
inline constexpr auto closeTimeCorrect = makeStr("close_time_correct");
inline constexpr auto closeResolutionMs = makeStr("close_resolution_ms");
inline constexpr auto ledgerHash = join(join(seg::xrpl, seg::ledger), makeStr("hash"));
} // namespace attr
// ===== Shared attribute values =============================================

View File

@@ -82,7 +82,7 @@ buildLedgerImpl(
built->header().seq < XRP_LEDGER_EARLIEST_FEES || built->read(keylet::fees()),
"xrpl::buildLedgerImpl : valid ledger fees");
built->setAccepted(closeTime, closeResolution, closeTimeCorrect);
buildSpan.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(built->header().seq));
buildSpan.setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(built->header().seq));
buildSpan.setAttribute(
ledger_span::attr::closeTime, static_cast<int64_t>(closeTime.time_since_epoch().count()));
buildSpan.setAttribute(ledger_span::attr::closeTimeCorrect, closeTimeCorrect);

View File

@@ -454,7 +454,7 @@ LedgerMaster::storeLedger(std::shared_ptr<Ledger const> ledger)
{
using namespace telemetry;
auto span = SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::store);
span.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(ledger->header().seq));
span.setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(ledger->header().seq));
bool const validated = ledger->header().validated;
// Returns true if we already had the ledger
@@ -974,7 +974,7 @@ LedgerMaster::checkAccept(std::shared_ptr<Ledger const> const& ledger)
using namespace telemetry;
auto valSpan = SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::validate);
valSpan.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(ledger->header().seq));
valSpan.setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(ledger->header().seq));
valSpan.setAttribute(ledger_span::attr::validations, static_cast<int64_t>(tvc));
JLOG(m_journal.info()) << "Advancing accepted ledger to " << ledger->header().seq

View File

@@ -29,22 +29,17 @@ inline constexpr auto apply = makeStr("apply");
// ===== Attribute keys ========================================================
namespace attr {
inline constexpr auto xrplLedger = join(seg::xrpl, seg::ledger);
/// Canonical shared constants (defined in SpanNames.h).
using ::xrpl::telemetry::attr::closeResolutionMs;
using ::xrpl::telemetry::attr::closeTime;
using ::xrpl::telemetry::attr::closeTimeCorrect;
using ::xrpl::telemetry::attr::ledgerHash;
using ::xrpl::telemetry::attr::ledgerSeq;
/// "xrpl.ledger.seq"
inline constexpr auto seq = join(xrplLedger, makeStr("seq"));
/// "xrpl.ledger.close_time"
inline constexpr auto closeTime = join(xrplLedger, makeStr("close_time"));
/// "xrpl.ledger.close_time_correct"
inline constexpr auto closeTimeCorrect = join(xrplLedger, makeStr("close_time_correct"));
/// "xrpl.ledger.close_resolution_ms"
inline constexpr auto closeResolutionMs = join(xrplLedger, makeStr("close_resolution_ms"));
/// "xrpl.ledger.tx_count"
inline constexpr auto txCount = join(xrplLedger, makeStr("tx_count"));
/// "xrpl.ledger.tx_failed"
inline constexpr auto txFailed = join(xrplLedger, makeStr("tx_failed"));
/// "xrpl.ledger.validations"
inline constexpr auto validations = join(xrplLedger, makeStr("validations"));
/// Domain-owned bare attrs.
inline constexpr auto txCount = makeStr("tx_count");
inline constexpr auto txFailed = makeStr("tx_failed");
inline constexpr auto validations = makeStr("validations");
} // namespace attr
} // namespace xrpl::telemetry::ledger_span

View File

@@ -1875,7 +1875,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMProposeSet> const& m)
{
using namespace telemetry;
auto span = SpanGuard::span(TraceCategory::Peer, seg::peer, peer_span::op::proposalReceive);
span.setAttribute(peer_span::attr::id, static_cast<int64_t>(id_));
span.setAttribute(peer_span::attr::peerId, static_cast<int64_t>(id_));
protocol::TMProposeSet const& set = *m;
@@ -2484,7 +2484,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
using namespace telemetry;
auto valSpan =
SpanGuard::span(TraceCategory::Peer, seg::peer, peer_span::op::validationReceive);
valSpan.setAttribute(peer_span::attr::id, static_cast<int64_t>(id_));
valSpan.setAttribute(peer_span::attr::peerId, static_cast<int64_t>(id_));
if (m->validation().size() < 50)
{
@@ -2508,8 +2508,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
false);
val->setSeen(closeTime);
}
valSpan.setAttribute(
peer_span::attr::validationLedgerHash, to_string(val->getLedgerHash()).c_str());
valSpan.setAttribute(peer_span::attr::ledgerHash, to_string(val->getLedgerHash()).c_str());
valSpan.setAttribute(peer_span::attr::validationFull, val->isFull());
if (!isCurrent(

View File

@@ -25,22 +25,14 @@ inline constexpr auto validationReceive = makeStr("validation.receive");
// ===== Attribute keys ========================================================
namespace attr {
inline constexpr auto xrplPeer = join(seg::xrpl, seg::peer);
/// Canonical shared constants (defined in SpanNames.h).
using ::xrpl::telemetry::attr::ledgerHash;
using ::xrpl::telemetry::attr::peerId;
/// "xrpl.peer.id"
inline constexpr auto id = join(xrplPeer, makeStr("id"));
/// "xrpl.peer.proposal.trusted"
inline constexpr auto proposalTrusted =
join(join(xrplPeer, makeStr("proposal")), makeStr("trusted"));
/// "xrpl.peer.validation.ledger_hash"
inline constexpr auto validationLedgerHash =
join(join(xrplPeer, makeStr("validation")), makeStr("ledger_hash"));
/// "xrpl.peer.validation.full"
inline constexpr auto validationFull = join(join(xrplPeer, makeStr("validation")), makeStr("full"));
/// "xrpl.peer.validation.trusted"
inline constexpr auto validationTrusted =
join(join(xrplPeer, makeStr("validation")), makeStr("trusted"));
/// Domain-owned bare attrs.
inline constexpr auto proposalTrusted = makeStr("proposal_trusted");
inline constexpr auto validationFull = makeStr("validation_full");
inline constexpr auto validationTrusted = makeStr("validation_trusted");
} // namespace attr
} // namespace xrpl::telemetry::peer_span