mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-02 16:26:48 +00:00
refactor(telemetry): simplify ledger/peer attr naming on phase-6, update dashboards
- Add canonical ledgerHash (xrpl.ledger.hash) to SpanNames.h. - LedgerSpanNames: reuse shared canonicals (ledgerSeq, closeTime, closeTimeCorrect, closeResolutionMs, ledgerHash); bare names for tx_count, tx_failed, validations. - PeerSpanNames: reuse shared canonicals (peerId, ledgerHash); bare names for proposal_trusted, validation_full, validation_trusted. - Update call sites in BuildLedger.cpp, LedgerMaster.cpp, PeerImp.cpp. - Update 5 Grafana dashboards: strip xrpl.<domain>. prefix from per-span attr refs in PromQL/TraceQL queries. Keep rule-5 entries. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -10,7 +10,7 @@
|
||||
"panels": [
|
||||
{
|
||||
"title": "Consensus Round Duration",
|
||||
"description": "p95 and p50 duration of consensus accept rounds. The consensus.accept span (RCLConsensus.cpp:395) measures the time to process an accepted ledger including transaction application and state finalization. The span carries xrpl.consensus.proposers and xrpl.consensus.round_time_ms attributes. Normal range is 3-6 seconds on mainnet.",
|
||||
"description": "p95 and p50 duration of consensus accept rounds. The consensus.accept span (RCLConsensus.cpp:395) measures the time to process an accepted ledger including transaction application and state finalization. The span carries proposers and round_time_ms attributes. Normal range is 3-6 seconds on mainnet.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -95,7 +95,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Ledger Close Duration",
|
||||
"description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp:282) measures the time from when consensus triggers a ledger close to completion. Carries xrpl.consensus.ledger.seq and xrpl.consensus.mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.",
|
||||
"description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp:282) measures the time from when consensus triggers a ledger close to completion. Carries xrpl.ledger.seq and xrpl.consensus.mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -134,7 +134,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Validation Send Rate",
|
||||
"description": "Rate at which this node sends ledger validations to the network. Sourced from the consensus.validation.send span (RCLConsensus.cpp:753). Each validation confirms the node has fully validated a ledger. The span carries xrpl.consensus.ledger.seq and xrpl.consensus.proposing. Should closely track the ledger close rate when the node is healthy.",
|
||||
"description": "Rate at which this node sends ledger validations to the network. Sourced from the consensus.validation.send span (RCLConsensus.cpp:753). Each validation confirms the node has fully validated a ledger. The span carries xrpl.ledger.seq and proposing. Should closely track the ledger close rate when the node is healthy.",
|
||||
"type": "stat",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -206,7 +206,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Close Time Agreement",
|
||||
"description": "Rate of close time agreement vs disagreement across consensus rounds. Based on xrpl.consensus.close_time_correct attribute (true = validators agreed, false = agreed to disagree per avCT_CONSENSUS_PCT).",
|
||||
"description": "Rate of close time agreement vs disagreement across consensus rounds. Based on close_time_correct attribute (true = validators agreed, false = agreed to disagree per avCT_CONSENSUS_PCT).",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -219,8 +219,8 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (xrpl_consensus_close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", xrpl_consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
|
||||
"legendFormat": "Close Time Correct={{xrpl_consensus_close_time_correct}} [{{exported_instance}}]"
|
||||
"expr": "sum by (close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", xrpl_consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
|
||||
"legendFormat": "Close Time Correct={{close_time_correct}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@@ -400,7 +400,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Close Time: Raw Proposals (Per Node)",
|
||||
"description": "Each node's raw proposed close time (xrpl.consensus.close_time_self) \u2014 the unrounded wall clock value at the moment the node closed its ledger. Compare across nodes to see clock drift.",
|
||||
"description": "Each node's raw proposed close time (close_time_self) \u2014 the unrounded wall clock value at the moment the node closed its ledger. Compare across nodes to see clock drift.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -436,14 +436,14 @@
|
||||
"type": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time_self)",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time_self)",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Close Time: Effective / Quantized",
|
||||
"description": "The consensus-agreed close time after rounding to the current resolution bin (xrpl.consensus.close_time). This is the value written to the ledger header. All nodes in agreement produce the same value.",
|
||||
"description": "The consensus-agreed close time after rounding to the current resolution bin (close_time). This is the value written to the ledger header. All nodes in agreement produce the same value.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -479,14 +479,14 @@
|
||||
"type": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time)",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time)",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Close Time Vote Bins & Resolution",
|
||||
"description": "Number of distinct close time vote bins (xrpl.consensus.close_time_vote_bins) and the bin size / resolution in ms (xrpl.consensus.close_resolution_ms). More bins = more clock disagreement. Resolution adapts: finer (10s) when validators agree, coarser (120s) when they disagree.",
|
||||
"description": "Number of distinct close time vote bins (close_time_vote_bins) and the bin size / resolution in ms (close_resolution_ms). More bins = more clock disagreement. Resolution adapts: finer (10s) when validators agree, coarser (120s) when they disagree.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -555,7 +555,7 @@
|
||||
"type": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time_vote_bins)",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time_vote_bins)",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
@@ -563,14 +563,14 @@
|
||||
"type": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_resolution_ms)",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_resolution_ms)",
|
||||
"refId": "B"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Close Time Resolution Direction",
|
||||
"description": "Whether close time resolution increased (coarser bins, more disagreement), decreased (finer bins, better agreement), or stayed unchanged relative to the previous ledger. Based on xrpl.consensus.resolution_direction attribute.",
|
||||
"description": "Whether close time resolution increased (coarser bins, more disagreement), decreased (finer bins, better agreement), or stayed unchanged relative to the previous ledger. Based on resolution_direction attribute.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -606,7 +606,7 @@
|
||||
"type": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\" && span.xrpl.consensus.resolution_direction=~\"$resolution_direction\"} | select(span.xrpl.consensus.resolution_direction)",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\" && span.resolution_direction=~\"$resolution_direction\"} | select(span.resolution_direction)",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
@@ -650,7 +650,7 @@
|
||||
"type": "tempo"
|
||||
},
|
||||
"queryType": "traceql",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time, span.xrpl.consensus.close_time_vote_bins)",
|
||||
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time, span.close_time_vote_bins)",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
|
||||
@@ -88,7 +88,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Ledger Validation Rate",
|
||||
"description": "Rate at which ledgers pass the validation threshold and are accepted as fully validated. The ledger.validate span (LedgerMaster.cpp:915) fires in checkAccept() only after the ledger receives sufficient trusted validations (>= quorum). Records xrpl.ledger.seq and xrpl.ledger.validations (the number of validations received).",
|
||||
"description": "Rate at which ledgers pass the validation threshold and are accepted as fully validated. The ledger.validate span (LedgerMaster.cpp:915) fires in checkAccept() only after the ledger receives sufficient trusted validations (>= quorum). Records xrpl.ledger.seq and validations (the number of validations received).",
|
||||
"type": "stat",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -156,7 +156,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Transaction Apply Duration",
|
||||
"description": "p95 and p50 duration of applying the consensus transaction set during ledger building. The tx.apply span (BuildLedger.cpp:88) wraps applyTransactions() which iterates through the CanonicalTXSet with multiple retry passes. Records xrpl.ledger.tx_count (successful) and xrpl.ledger.tx_failed (failed) as attributes.",
|
||||
"description": "p95 and p50 duration of applying the consensus transaction set during ledger building. The tx.apply span (BuildLedger.cpp:88) wraps applyTransactions() which iterates through the CanonicalTXSet with multiple retry passes. Records tx_count (successful) and tx_failed (failed) as attributes.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"panels": [
|
||||
{
|
||||
"title": "Peer Proposal Receive Rate",
|
||||
"description": "Rate of consensus proposals received from network peers. The peer.proposal.receive span (PeerImp.cpp:1667) fires in onMessage(TMProposeSet) for each incoming proposal. Records xrpl.peer.id (sending peer) and xrpl.peer.proposal.trusted (whether the proposer is in our UNL). Requires trace_peer=1 in the telemetry config.",
|
||||
"description": "Rate of consensus proposals received from network peers. The peer.proposal.receive span (PeerImp.cpp:1667) fires in onMessage(TMProposeSet) for each incoming proposal. Records xrpl.peer.id (sending peer) and proposal_trusted (whether the proposer is in our UNL). Requires trace_peer=1 in the telemetry config.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -50,7 +50,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Peer Validation Receive Rate",
|
||||
"description": "Rate of ledger validations received from network peers. The peer.validation.receive span (PeerImp.cpp:2264) fires in onMessage(TMValidation) for each incoming validation message. Records xrpl.peer.id (sending peer) and xrpl.peer.validation.trusted (whether the validator is trusted). Requires trace_peer=1 in the telemetry config.",
|
||||
"description": "Rate of ledger validations received from network peers. The peer.validation.receive span (PeerImp.cpp:2264) fires in onMessage(TMValidation) for each incoming validation message. Records xrpl.peer.id (sending peer) and validation_trusted (whether the validator is trusted). Requires trace_peer=1 in the telemetry config.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -89,7 +89,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Proposals Trusted vs Untrusted",
|
||||
"description": "Pie chart showing the ratio of proposals received from trusted validators (in our UNL) vs untrusted validators. Grouped by the xrpl.peer.proposal.trusted span attribute (true/false). A healthy node connected to a well-configured UNL should see a significant portion of trusted proposals. Note: proposals that fail early validation may not have the trusted attribute set.",
|
||||
"description": "Pie chart showing the ratio of proposals received from trusted validators (in our UNL) vs untrusted validators. Grouped by the proposal_trusted span attribute (true/false). A healthy node connected to a well-configured UNL should see a significant portion of trusted proposals. Note: proposals that fail early validation may not have the trusted attribute set.",
|
||||
"type": "piechart",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -108,8 +108,8 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (xrpl_peer_proposal_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_peer_proposal_trusted=~\"$proposal_trusted\", span_name=\"peer.proposal.receive\"}[5m]))",
|
||||
"legendFormat": "Trusted = {{xrpl_peer_proposal_trusted}} [{{exported_instance}}]"
|
||||
"expr": "sum by (proposal_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", proposal_trusted=~\"$proposal_trusted\", span_name=\"peer.proposal.receive\"}[5m]))",
|
||||
"legendFormat": "Trusted = {{proposal_trusted}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@@ -121,7 +121,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Validations Trusted vs Untrusted",
|
||||
"description": "Pie chart showing the ratio of validations received from trusted validators (in our UNL) vs untrusted validators. Grouped by the xrpl.peer.validation.trusted span attribute (true/false). Monitoring this helps detect if the node is receiving validations from the expected set of trusted validators. Note: validations that fail early checks may not have the trusted attribute set.",
|
||||
"description": "Pie chart showing the ratio of validations received from trusted validators (in our UNL) vs untrusted validators. Grouped by the validation_trusted span attribute (true/false). Monitoring this helps detect if the node is receiving validations from the expected set of trusted validators. Note: validations that fail early checks may not have the trusted attribute set.",
|
||||
"type": "piechart",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -140,8 +140,8 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (xrpl_peer_validation_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_peer_validation_trusted=~\"$validation_trusted\", span_name=\"peer.validation.receive\"}[5m]))",
|
||||
"legendFormat": "Trusted = {{xrpl_peer_validation_trusted}} [{{exported_instance}}]"
|
||||
"expr": "sum by (validation_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", validation_trusted=~\"$validation_trusted\", span_name=\"peer.validation.receive\"}[5m]))",
|
||||
"legendFormat": "Trusted = {{validation_trusted}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@@ -181,7 +181,7 @@
|
||||
"label": "Proposal Trusted",
|
||||
"description": "Filter by proposal trust status (true = from trusted validator)",
|
||||
"type": "query",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.proposal.receive\"}, xrpl_peer_proposal_trusted)",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.proposal.receive\"}, proposal_trusted)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
@@ -201,7 +201,7 @@
|
||||
"label": "Validation Trusted",
|
||||
"description": "Filter by validation trust status (true = from trusted validator)",
|
||||
"type": "query",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.validation.receive\"}, xrpl_peer_validation_trusted)",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.validation.receive\"}, validation_trusted)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"panels": [
|
||||
{
|
||||
"title": "RPC Request Rate by Command",
|
||||
"description": "Per-second rate of RPC command executions, broken down by command name (e.g. server_info, submit). Calculated as rate(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}) over a 5m window, grouped by the xrpl.rpc.command span attribute.",
|
||||
"description": "Per-second rate of RPC command executions, broken down by command name (e.g. server_info, submit). Calculated as rate(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}) over a 5m window, grouped by the command span attribute.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -29,8 +29,8 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m]))",
|
||||
"legendFormat": "{{xrpl_rpc_command}} [{{exported_instance}}]"
|
||||
"expr": "sum by (command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m]))",
|
||||
"legendFormat": "{{command}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@@ -49,7 +49,7 @@
|
||||
},
|
||||
{
|
||||
"title": "RPC Latency P95 by Command",
|
||||
"description": "95th percentile response time for each RPC command. Computed from the spanmetrics duration histogram using histogram_quantile(0.95) over rpc.command.* spans, grouped by xrpl.rpc.command. High values indicate slow commands that may need optimization.",
|
||||
"description": "95th percentile response time for each RPC command. Computed from the spanmetrics duration histogram using histogram_quantile(0.95) over rpc.command.* spans, grouped by command. High values indicate slow commands that may need optimization.",
|
||||
"type": "timeseries",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -68,8 +68,8 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum by (le, xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
|
||||
"legendFormat": "P95 {{xrpl_rpc_command}} [{{exported_instance}}]"
|
||||
"expr": "histogram_quantile(0.95, sum by (le, command, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
|
||||
"legendFormat": "P95 {{command}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@@ -107,8 +107,8 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m])) / sum by (xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) * 100",
|
||||
"legendFormat": "{{xrpl_rpc_command}} [{{exported_instance}}]"
|
||||
"expr": "sum by (command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m])) / sum by (command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) * 100",
|
||||
"legendFormat": "{{command}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@@ -158,7 +158,7 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) by (le)",
|
||||
"expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) by (le)",
|
||||
"legendFormat": "{{le}}",
|
||||
"format": "heatmap"
|
||||
}
|
||||
@@ -185,14 +185,14 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=\"rpc.request\"}[5m]))",
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=\"rpc.request\"}[5m]))",
|
||||
"legendFormat": "rpc.request / Sec [{{exported_instance}}]"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=\"rpc.process\"}[5m]))",
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=\"rpc.process\"}[5m]))",
|
||||
"legendFormat": "rpc.process / Sec [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
@@ -231,14 +231,14 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_UNSET\"}[5m]))",
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_UNSET\"}[5m]))",
|
||||
"legendFormat": "Success [{{exported_instance}}]"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m]))",
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m]))",
|
||||
"legendFormat": "Error [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
@@ -277,8 +277,8 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "topk(10, sum by (xrpl_rpc_command, exported_instance) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
|
||||
"legendFormat": "{{xrpl_rpc_command}} [{{exported_instance}}]"
|
||||
"expr": "topk(10, sum by (command, exported_instance) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
|
||||
"legendFormat": "{{command}} [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@@ -309,7 +309,7 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=\"rpc.ws_message\"}[5m]))",
|
||||
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=\"rpc.ws_message\"}[5m]))",
|
||||
"legendFormat": "WS Messages / Sec [{{exported_instance}}]"
|
||||
}
|
||||
],
|
||||
@@ -350,7 +350,7 @@
|
||||
"label": "RPC Command",
|
||||
"description": "Filter by RPC command name (e.g., server_info, submit)",
|
||||
"type": "query",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}, xrpl_rpc_command)",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}, command)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Transaction Path Distribution",
|
||||
"description": "Breakdown of transactions by origin path. The xrpl.tx.local attribute indicates whether the transaction was submitted locally (true) or received from a peer (false). Helps understand the ratio of locally-originated vs relayed transactions.",
|
||||
"description": "Breakdown of transactions by origin path. The local attribute indicates whether the transaction was submitted locally (true) or received from a peer (false). Helps understand the ratio of locally-originated vs relayed transactions.",
|
||||
"type": "piechart",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -121,8 +121,8 @@
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"expr": "sum by (xrpl_tx_local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_tx_local=~\"$tx_origin\", span_name=\"tx.process\"}[5m]))",
|
||||
"legendFormat": "Local = {{xrpl_tx_local}} [{{exported_instance}}]"
|
||||
"expr": "sum by (local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", local=~\"$tx_origin\", span_name=\"tx.process\"}[5m]))",
|
||||
"legendFormat": "Local = {{local}} [{{exported_instance}}]"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -282,7 +282,7 @@
|
||||
},
|
||||
{
|
||||
"title": "Transaction Apply Failed Rate",
|
||||
"description": "Rate of tx.apply spans completing with error status, indicating transaction application failures during ledger building. The span records xrpl.ledger.tx_failed as an attribute. Thresholds: green < 0.1/sec, yellow 0.1-1/sec, red > 1/sec. Some failures are normal (e.g. conflicting offers) but sustained high rates may indicate issues.",
|
||||
"description": "Rate of tx.apply spans completing with error status, indicating transaction application failures during ledger building. The span records tx_failed as an attribute. Thresholds: green < 0.1/sec, yellow 0.1-1/sec, red > 1/sec. Some failures are normal (e.g. conflicting offers) but sustained high rates may indicate issues.",
|
||||
"type": "stat",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -358,7 +358,7 @@
|
||||
"label": "TX Origin",
|
||||
"description": "Filter by transaction origin (true = local submit, false = peer relay)",
|
||||
"type": "query",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\"}, xrpl_tx_local)",
|
||||
"query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\"}, local)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
|
||||
@@ -122,6 +122,7 @@ inline constexpr auto ledgerSeq = join(join(seg::xrpl, seg::ledger), makeStr("se
|
||||
inline constexpr auto closeTime = makeStr("close_time");
|
||||
inline constexpr auto closeTimeCorrect = makeStr("close_time_correct");
|
||||
inline constexpr auto closeResolutionMs = makeStr("close_resolution_ms");
|
||||
inline constexpr auto ledgerHash = join(join(seg::xrpl, seg::ledger), makeStr("hash"));
|
||||
} // namespace attr
|
||||
|
||||
// ===== Shared attribute values =============================================
|
||||
|
||||
@@ -82,7 +82,7 @@ buildLedgerImpl(
|
||||
built->header().seq < XRP_LEDGER_EARLIEST_FEES || built->read(keylet::fees()),
|
||||
"xrpl::buildLedgerImpl : valid ledger fees");
|
||||
built->setAccepted(closeTime, closeResolution, closeTimeCorrect);
|
||||
buildSpan.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(built->header().seq));
|
||||
buildSpan.setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(built->header().seq));
|
||||
buildSpan.setAttribute(
|
||||
ledger_span::attr::closeTime, static_cast<int64_t>(closeTime.time_since_epoch().count()));
|
||||
buildSpan.setAttribute(ledger_span::attr::closeTimeCorrect, closeTimeCorrect);
|
||||
|
||||
@@ -454,7 +454,7 @@ LedgerMaster::storeLedger(std::shared_ptr<Ledger const> ledger)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto span = SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::store);
|
||||
span.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(ledger->header().seq));
|
||||
span.setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(ledger->header().seq));
|
||||
|
||||
bool const validated = ledger->header().validated;
|
||||
// Returns true if we already had the ledger
|
||||
@@ -974,7 +974,7 @@ LedgerMaster::checkAccept(std::shared_ptr<Ledger const> const& ledger)
|
||||
|
||||
using namespace telemetry;
|
||||
auto valSpan = SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::validate);
|
||||
valSpan.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(ledger->header().seq));
|
||||
valSpan.setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(ledger->header().seq));
|
||||
valSpan.setAttribute(ledger_span::attr::validations, static_cast<int64_t>(tvc));
|
||||
|
||||
JLOG(m_journal.info()) << "Advancing accepted ledger to " << ledger->header().seq
|
||||
|
||||
@@ -29,22 +29,17 @@ inline constexpr auto apply = makeStr("apply");
|
||||
// ===== Attribute keys ========================================================
|
||||
|
||||
namespace attr {
|
||||
inline constexpr auto xrplLedger = join(seg::xrpl, seg::ledger);
|
||||
/// Canonical shared constants (defined in SpanNames.h).
|
||||
using ::xrpl::telemetry::attr::closeResolutionMs;
|
||||
using ::xrpl::telemetry::attr::closeTime;
|
||||
using ::xrpl::telemetry::attr::closeTimeCorrect;
|
||||
using ::xrpl::telemetry::attr::ledgerHash;
|
||||
using ::xrpl::telemetry::attr::ledgerSeq;
|
||||
|
||||
/// "xrpl.ledger.seq"
|
||||
inline constexpr auto seq = join(xrplLedger, makeStr("seq"));
|
||||
/// "xrpl.ledger.close_time"
|
||||
inline constexpr auto closeTime = join(xrplLedger, makeStr("close_time"));
|
||||
/// "xrpl.ledger.close_time_correct"
|
||||
inline constexpr auto closeTimeCorrect = join(xrplLedger, makeStr("close_time_correct"));
|
||||
/// "xrpl.ledger.close_resolution_ms"
|
||||
inline constexpr auto closeResolutionMs = join(xrplLedger, makeStr("close_resolution_ms"));
|
||||
/// "xrpl.ledger.tx_count"
|
||||
inline constexpr auto txCount = join(xrplLedger, makeStr("tx_count"));
|
||||
/// "xrpl.ledger.tx_failed"
|
||||
inline constexpr auto txFailed = join(xrplLedger, makeStr("tx_failed"));
|
||||
/// "xrpl.ledger.validations"
|
||||
inline constexpr auto validations = join(xrplLedger, makeStr("validations"));
|
||||
/// Domain-owned bare attrs.
|
||||
inline constexpr auto txCount = makeStr("tx_count");
|
||||
inline constexpr auto txFailed = makeStr("tx_failed");
|
||||
inline constexpr auto validations = makeStr("validations");
|
||||
} // namespace attr
|
||||
|
||||
} // namespace xrpl::telemetry::ledger_span
|
||||
|
||||
@@ -1875,7 +1875,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMProposeSet> const& m)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto span = SpanGuard::span(TraceCategory::Peer, seg::peer, peer_span::op::proposalReceive);
|
||||
span.setAttribute(peer_span::attr::id, static_cast<int64_t>(id_));
|
||||
span.setAttribute(peer_span::attr::peerId, static_cast<int64_t>(id_));
|
||||
|
||||
protocol::TMProposeSet const& set = *m;
|
||||
|
||||
@@ -2484,7 +2484,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
|
||||
using namespace telemetry;
|
||||
auto valSpan =
|
||||
SpanGuard::span(TraceCategory::Peer, seg::peer, peer_span::op::validationReceive);
|
||||
valSpan.setAttribute(peer_span::attr::id, static_cast<int64_t>(id_));
|
||||
valSpan.setAttribute(peer_span::attr::peerId, static_cast<int64_t>(id_));
|
||||
|
||||
if (m->validation().size() < 50)
|
||||
{
|
||||
@@ -2508,8 +2508,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
|
||||
false);
|
||||
val->setSeen(closeTime);
|
||||
}
|
||||
valSpan.setAttribute(
|
||||
peer_span::attr::validationLedgerHash, to_string(val->getLedgerHash()).c_str());
|
||||
valSpan.setAttribute(peer_span::attr::ledgerHash, to_string(val->getLedgerHash()).c_str());
|
||||
valSpan.setAttribute(peer_span::attr::validationFull, val->isFull());
|
||||
|
||||
if (!isCurrent(
|
||||
|
||||
@@ -25,22 +25,14 @@ inline constexpr auto validationReceive = makeStr("validation.receive");
|
||||
// ===== Attribute keys ========================================================
|
||||
|
||||
namespace attr {
|
||||
inline constexpr auto xrplPeer = join(seg::xrpl, seg::peer);
|
||||
/// Canonical shared constants (defined in SpanNames.h).
|
||||
using ::xrpl::telemetry::attr::ledgerHash;
|
||||
using ::xrpl::telemetry::attr::peerId;
|
||||
|
||||
/// "xrpl.peer.id"
|
||||
inline constexpr auto id = join(xrplPeer, makeStr("id"));
|
||||
/// "xrpl.peer.proposal.trusted"
|
||||
inline constexpr auto proposalTrusted =
|
||||
join(join(xrplPeer, makeStr("proposal")), makeStr("trusted"));
|
||||
|
||||
/// "xrpl.peer.validation.ledger_hash"
|
||||
inline constexpr auto validationLedgerHash =
|
||||
join(join(xrplPeer, makeStr("validation")), makeStr("ledger_hash"));
|
||||
/// "xrpl.peer.validation.full"
|
||||
inline constexpr auto validationFull = join(join(xrplPeer, makeStr("validation")), makeStr("full"));
|
||||
/// "xrpl.peer.validation.trusted"
|
||||
inline constexpr auto validationTrusted =
|
||||
join(join(xrplPeer, makeStr("validation")), makeStr("trusted"));
|
||||
/// Domain-owned bare attrs.
|
||||
inline constexpr auto proposalTrusted = makeStr("proposal_trusted");
|
||||
inline constexpr auto validationFull = makeStr("validation_full");
|
||||
inline constexpr auto validationTrusted = makeStr("validation_trusted");
|
||||
} // namespace attr
|
||||
|
||||
} // namespace xrpl::telemetry::peer_span
|
||||
|
||||
Reference in New Issue
Block a user