Merge branch 'pratik/otel-phase8-log-correlation' into pratik/otel-phase9-metric-gap-fill

This commit is contained in:
Pratik Mankawde
2026-05-13 16:17:12 +01:00
27 changed files with 390 additions and 341 deletions

View File

@@ -10,7 +10,7 @@
"panels": [
{
"title": "Consensus Round Duration",
"description": "p95 and p50 duration of consensus accept rounds. The consensus.accept span (RCLConsensus.cpp:395) measures the time to process an accepted ledger including transaction application and state finalization. The span carries xrpl.consensus.proposers and xrpl.consensus.round_time_ms attributes. Normal range is 3-6 seconds on mainnet.",
"description": "p95 and p50 duration of consensus accept rounds. The consensus.accept span (RCLConsensus.cpp:395) measures the time to process an accepted ledger including transaction application and state finalization. The span carries proposers and round_time_ms attributes. Normal range is 3-6 seconds on mainnet.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -95,7 +95,7 @@
},
{
"title": "Ledger Close Duration",
"description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp:282) measures the time from when consensus triggers a ledger close to completion. Carries xrpl.consensus.ledger.seq and xrpl.consensus.mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.",
"description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp:282) measures the time from when consensus triggers a ledger close to completion. Carries xrpl.ledger.seq and xrpl.consensus.mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -134,7 +134,7 @@
},
{
"title": "Validation Send Rate",
"description": "Rate at which this node sends ledger validations to the network. Sourced from the consensus.validation.send span (RCLConsensus.cpp:753). Each validation confirms the node has fully validated a ledger. The span carries xrpl.consensus.ledger.seq and xrpl.consensus.proposing. Should closely track the ledger close rate when the node is healthy.",
"description": "Rate at which this node sends ledger validations to the network. Sourced from the consensus.validation.send span (RCLConsensus.cpp:753). Each validation confirms the node has fully validated a ledger. The span carries xrpl.ledger.seq and proposing. Should closely track the ledger close rate when the node is healthy.",
"type": "stat",
"gridPos": {
"h": 8,
@@ -206,7 +206,7 @@
},
{
"title": "Close Time Agreement",
"description": "Rate of close time agreement vs disagreement across consensus rounds. Based on xrpl.consensus.close_time_correct attribute (true = validators agreed, false = agreed to disagree per avCT_CONSENSUS_PCT).",
"description": "Rate of close time agreement vs disagreement across consensus rounds. Based on close_time_correct attribute (true = validators agreed, false = agreed to disagree per avCT_CONSENSUS_PCT).",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -219,8 +219,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_consensus_close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", xrpl_consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
"legendFormat": "Close Time Correct={{xrpl_consensus_close_time_correct}} [{{exported_instance}}]"
"expr": "sum by (close_time_correct, exported_instance) (rate(traces_span_metrics_calls_total{span_name=\"consensus.accept.apply\", xrpl_consensus_mode=~\"$consensus_mode\", exported_instance=~\"$node\"}[$__rate_interval]))",
"legendFormat": "Close Time Correct={{close_time_correct}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -400,7 +400,7 @@
},
{
"title": "Close Time: Raw Proposals (Per Node)",
"description": "Each node's raw proposed close time (xrpl.consensus.close_time_self) \u2014 the unrounded wall clock value at the moment the node closed its ledger. Compare across nodes to see clock drift.",
"description": "Each node's raw proposed close time (close_time_self) \u2014 the unrounded wall clock value at the moment the node closed its ledger. Compare across nodes to see clock drift.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -436,14 +436,14 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time_self)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time_self)",
"refId": "A"
}
]
},
{
"title": "Close Time: Effective / Quantized",
"description": "The consensus-agreed close time after rounding to the current resolution bin (xrpl.consensus.close_time). This is the value written to the ledger header. All nodes in agreement produce the same value.",
"description": "The consensus-agreed close time after rounding to the current resolution bin (close_time). This is the value written to the ledger header. All nodes in agreement produce the same value.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -479,14 +479,14 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time)",
"refId": "A"
}
]
},
{
"title": "Close Time Vote Bins & Resolution",
"description": "Number of distinct close time vote bins (xrpl.consensus.close_time_vote_bins) and the bin size / resolution in ms (xrpl.consensus.close_resolution_ms). More bins = more clock disagreement. Resolution adapts: finer (10s) when validators agree, coarser (120s) when they disagree.",
"description": "Number of distinct close time vote bins (close_time_vote_bins) and the bin size / resolution in ms (close_resolution_ms). More bins = more clock disagreement. Resolution adapts: finer (10s) when validators agree, coarser (120s) when they disagree.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -555,7 +555,7 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time_vote_bins)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time_vote_bins)",
"refId": "A"
},
{
@@ -563,14 +563,14 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_resolution_ms)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_resolution_ms)",
"refId": "B"
}
]
},
{
"title": "Close Time Resolution Direction",
"description": "Whether close time resolution increased (coarser bins, more disagreement), decreased (finer bins, better agreement), or stayed unchanged relative to the previous ledger. Based on xrpl.consensus.resolution_direction attribute.",
"description": "Whether close time resolution increased (coarser bins, more disagreement), decreased (finer bins, better agreement), or stayed unchanged relative to the previous ledger. Based on resolution_direction attribute.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -606,7 +606,7 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\" && span.xrpl.consensus.resolution_direction=~\"$resolution_direction\"} | select(span.xrpl.consensus.resolution_direction)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\" && span.resolution_direction=~\"$resolution_direction\"} | select(span.resolution_direction)",
"refId": "A"
}
]
@@ -650,7 +650,7 @@
"type": "tempo"
},
"queryType": "traceql",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.xrpl.consensus.close_time_correct=~\"$close_time_correct\"} | select(span.xrpl.consensus.close_time, span.xrpl.consensus.close_time_vote_bins)",
"query": "{name=\"consensus.accept.apply\" && resource.service.instance.id=~\"$node\" && span.close_time_correct=~\"$close_time_correct\"} | select(span.close_time, span.close_time_vote_bins)",
"refId": "A"
}
]

View File

@@ -88,7 +88,7 @@
},
{
"title": "Ledger Validation Rate",
"description": "Rate at which ledgers pass the validation threshold and are accepted as fully validated. The ledger.validate span (LedgerMaster.cpp:915) fires in checkAccept() only after the ledger receives sufficient trusted validations (>= quorum). Records xrpl.ledger.seq and xrpl.ledger.validations (the number of validations received).",
"description": "Rate at which ledgers pass the validation threshold and are accepted as fully validated. The ledger.validate span (LedgerMaster.cpp:915) fires in checkAccept() only after the ledger receives sufficient trusted validations (>= quorum). Records xrpl.ledger.seq and validations (the number of validations received).",
"type": "stat",
"gridPos": {
"h": 8,
@@ -156,7 +156,7 @@
},
{
"title": "Transaction Apply Duration",
"description": "p95 and p50 duration of applying the consensus transaction set during ledger building. The tx.apply span (BuildLedger.cpp:88) wraps applyTransactions() which iterates through the CanonicalTXSet with multiple retry passes. Records xrpl.ledger.tx_count (successful) and xrpl.ledger.tx_failed (failed) as attributes.",
"description": "p95 and p50 duration of applying the consensus transaction set during ledger building. The tx.apply span (BuildLedger.cpp:88) wraps applyTransactions() which iterates through the CanonicalTXSet with multiple retry passes. Records tx_count (successful) and tx_failed (failed) as attributes.",
"type": "timeseries",
"gridPos": {
"h": 8,

View File

@@ -11,7 +11,7 @@
"panels": [
{
"title": "Peer Proposal Receive Rate",
"description": "Rate of consensus proposals received from network peers. The peer.proposal.receive span (PeerImp.cpp:1667) fires in onMessage(TMProposeSet) for each incoming proposal. Records xrpl.peer.id (sending peer) and xrpl.peer.proposal.trusted (whether the proposer is in our UNL). Requires trace_peer=1 in the telemetry config.",
"description": "Rate of consensus proposals received from network peers. The peer.proposal.receive span (PeerImp.cpp:1667) fires in onMessage(TMProposeSet) for each incoming proposal. Records xrpl.peer.id (sending peer) and proposal_trusted (whether the proposer is in our UNL). Requires trace_peer=1 in the telemetry config.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -50,7 +50,7 @@
},
{
"title": "Peer Validation Receive Rate",
"description": "Rate of ledger validations received from network peers. The peer.validation.receive span (PeerImp.cpp:2264) fires in onMessage(TMValidation) for each incoming validation message. Records xrpl.peer.id (sending peer) and xrpl.peer.validation.trusted (whether the validator is trusted). Requires trace_peer=1 in the telemetry config.",
"description": "Rate of ledger validations received from network peers. The peer.validation.receive span (PeerImp.cpp:2264) fires in onMessage(TMValidation) for each incoming validation message. Records xrpl.peer.id (sending peer) and validation_trusted (whether the validator is trusted). Requires trace_peer=1 in the telemetry config.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -89,7 +89,7 @@
},
{
"title": "Proposals Trusted vs Untrusted",
"description": "Pie chart showing the ratio of proposals received from trusted validators (in our UNL) vs untrusted validators. Grouped by the xrpl.peer.proposal.trusted span attribute (true/false). A healthy node connected to a well-configured UNL should see a significant portion of trusted proposals. Note: proposals that fail early validation may not have the trusted attribute set.",
"description": "Pie chart showing the ratio of proposals received from trusted validators (in our UNL) vs untrusted validators. Grouped by the proposal_trusted span attribute (true/false). A healthy node connected to a well-configured UNL should see a significant portion of trusted proposals. Note: proposals that fail early validation may not have the trusted attribute set.",
"type": "piechart",
"gridPos": {
"h": 8,
@@ -108,8 +108,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_peer_proposal_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_peer_proposal_trusted=~\"$proposal_trusted\", span_name=\"peer.proposal.receive\"}[5m]))",
"legendFormat": "Trusted = {{xrpl_peer_proposal_trusted}} [{{exported_instance}}]"
"expr": "sum by (proposal_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", proposal_trusted=~\"$proposal_trusted\", span_name=\"peer.proposal.receive\"}[5m]))",
"legendFormat": "Trusted = {{proposal_trusted}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -121,7 +121,7 @@
},
{
"title": "Validations Trusted vs Untrusted",
"description": "Pie chart showing the ratio of validations received from trusted validators (in our UNL) vs untrusted validators. Grouped by the xrpl.peer.validation.trusted span attribute (true/false). Monitoring this helps detect if the node is receiving validations from the expected set of trusted validators. Note: validations that fail early checks may not have the trusted attribute set.",
"description": "Pie chart showing the ratio of validations received from trusted validators (in our UNL) vs untrusted validators. Grouped by the validation_trusted span attribute (true/false). Monitoring this helps detect if the node is receiving validations from the expected set of trusted validators. Note: validations that fail early checks may not have the trusted attribute set.",
"type": "piechart",
"gridPos": {
"h": 8,
@@ -140,8 +140,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_peer_validation_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_peer_validation_trusted=~\"$validation_trusted\", span_name=\"peer.validation.receive\"}[5m]))",
"legendFormat": "Trusted = {{xrpl_peer_validation_trusted}} [{{exported_instance}}]"
"expr": "sum by (validation_trusted, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", validation_trusted=~\"$validation_trusted\", span_name=\"peer.validation.receive\"}[5m]))",
"legendFormat": "Trusted = {{validation_trusted}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -181,7 +181,7 @@
"label": "Proposal Trusted",
"description": "Filter by proposal trust status (true = from trusted validator)",
"type": "query",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.proposal.receive\"}, xrpl_peer_proposal_trusted)",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.proposal.receive\"}, proposal_trusted)",
"datasource": {
"type": "prometheus",
"uid": "prometheus"
@@ -201,7 +201,7 @@
"label": "Validation Trusted",
"description": "Filter by validation trust status (true = from trusted validator)",
"type": "query",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.validation.receive\"}, xrpl_peer_validation_trusted)",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"peer.validation.receive\"}, validation_trusted)",
"datasource": {
"type": "prometheus",
"uid": "prometheus"

View File

@@ -10,7 +10,7 @@
"panels": [
{
"title": "RPC Request Rate by Command",
"description": "Per-second rate of RPC command executions, broken down by command name (e.g. server_info, submit). Calculated as rate(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}) over a 5m window, grouped by the xrpl.rpc.command span attribute.",
"description": "Per-second rate of RPC command executions, broken down by command name (e.g. server_info, submit). Calculated as rate(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}) over a 5m window, grouped by the command span attribute.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -29,8 +29,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m]))",
"legendFormat": "{{xrpl_rpc_command}} [{{exported_instance}}]"
"expr": "sum by (command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m]))",
"legendFormat": "{{command}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -49,7 +49,7 @@
},
{
"title": "RPC Latency P95 by Command",
"description": "95th percentile response time for each RPC command. Computed from the spanmetrics duration histogram using histogram_quantile(0.95) over rpc.command.* spans, grouped by xrpl.rpc.command. High values indicate slow commands that may need optimization.",
"description": "95th percentile response time for each RPC command. Computed from the spanmetrics duration histogram using histogram_quantile(0.95) over rpc.command.* spans, grouped by command. High values indicate slow commands that may need optimization.",
"type": "timeseries",
"gridPos": {
"h": 8,
@@ -68,8 +68,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "histogram_quantile(0.95, sum by (le, xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
"legendFormat": "P95 {{xrpl_rpc_command}} [{{exported_instance}}]"
"expr": "histogram_quantile(0.95, sum by (le, command, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
"legendFormat": "P95 {{command}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -107,8 +107,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m])) / sum by (xrpl_rpc_command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) * 100",
"legendFormat": "{{xrpl_rpc_command}} [{{exported_instance}}]"
"expr": "sum by (command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m])) / sum by (command, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) * 100",
"legendFormat": "{{command}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -158,7 +158,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) by (le)",
"expr": "sum(increase(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])) by (le)",
"legendFormat": "{{le}}",
"format": "heatmap"
}
@@ -185,14 +185,14 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=\"rpc.request\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=\"rpc.request\"}[5m]))",
"legendFormat": "rpc.request / Sec [{{exported_instance}}]"
},
{
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=\"rpc.process\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=\"rpc.process\"}[5m]))",
"legendFormat": "rpc.process / Sec [{{exported_instance}}]"
}
],
@@ -231,14 +231,14 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_UNSET\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_UNSET\"}[5m]))",
"legendFormat": "Success [{{exported_instance}}]"
},
{
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\", status_code=\"STATUS_CODE_ERROR\"}[5m]))",
"legendFormat": "Error [{{exported_instance}}]"
}
],
@@ -277,8 +277,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "topk(10, sum by (xrpl_rpc_command, exported_instance) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
"legendFormat": "{{xrpl_rpc_command}} [{{exported_instance}}]"
"expr": "topk(10, sum by (command, exported_instance) (increase(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=~\"rpc.command.*\"}[5m])))",
"legendFormat": "{{command}} [{{exported_instance}}]"
}
],
"fieldConfig": {
@@ -309,7 +309,7 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_rpc_command=~\"$command\", span_name=\"rpc.ws_message\"}[5m]))",
"expr": "sum by (exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", command=~\"$command\", span_name=\"rpc.ws_message\"}[5m]))",
"legendFormat": "WS Messages / Sec [{{exported_instance}}]"
}
],
@@ -350,7 +350,7 @@
"label": "RPC Command",
"description": "Filter by RPC command name (e.g., server_info, submit)",
"type": "query",
"query": "label_values(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}, xrpl_rpc_command)",
"query": "label_values(traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}, command)",
"datasource": {
"type": "prometheus",
"uid": "prometheus"

View File

@@ -102,7 +102,7 @@
},
{
"title": "Transaction Path Distribution",
"description": "Breakdown of transactions by origin path. The xrpl.tx.local attribute indicates whether the transaction was submitted locally (true) or received from a peer (false). Helps understand the ratio of locally-originated vs relayed transactions.",
"description": "Breakdown of transactions by origin path. The local attribute indicates whether the transaction was submitted locally (true) or received from a peer (false). Helps understand the ratio of locally-originated vs relayed transactions.",
"type": "piechart",
"gridPos": {
"h": 8,
@@ -121,8 +121,8 @@
"datasource": {
"type": "prometheus"
},
"expr": "sum by (xrpl_tx_local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", xrpl_tx_local=~\"$tx_origin\", span_name=\"tx.process\"}[5m]))",
"legendFormat": "Local = {{xrpl_tx_local}} [{{exported_instance}}]"
"expr": "sum by (local, exported_instance) (rate(traces_span_metrics_calls_total{exported_instance=~\"$node\", local=~\"$tx_origin\", span_name=\"tx.process\"}[5m]))",
"legendFormat": "Local = {{local}} [{{exported_instance}}]"
}
]
},
@@ -282,7 +282,7 @@
},
{
"title": "Transaction Apply Failed Rate",
"description": "Rate of tx.apply spans completing with error status, indicating transaction application failures during ledger building. The span records xrpl.ledger.tx_failed as an attribute. Thresholds: green < 0.1/sec, yellow 0.1-1/sec, red > 1/sec. Some failures are normal (e.g. conflicting offers) but sustained high rates may indicate issues.",
"description": "Rate of tx.apply spans completing with error status, indicating transaction application failures during ledger building. The span records tx_failed as an attribute. Thresholds: green < 0.1/sec, yellow 0.1-1/sec, red > 1/sec. Some failures are normal (e.g. conflicting offers) but sustained high rates may indicate issues.",
"type": "stat",
"gridPos": {
"h": 8,
@@ -358,7 +358,7 @@
"label": "TX Origin",
"description": "Filter by transaction origin (true = local submit, false = peer relay)",
"type": "query",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\"}, xrpl_tx_local)",
"query": "label_values(traces_span_metrics_calls_total{span_name=\"tx.process\"}, local)",
"datasource": {
"type": "prometheus",
"uid": "prometheus"

View File

@@ -185,15 +185,15 @@ Traced RPC operations produce a span hierarchy like:
```
rpc.request
└── rpc.command.server_info (xrpl.rpc.command=server_info, xrpl.rpc.status=success)
└── rpc.command.server_info (command=server_info, rpc_status=success)
```
Each span includes attributes:
- `xrpl.rpc.command` — the RPC method name
- `xrpl.rpc.version` — API version
- `xrpl.rpc.role``admin` or `user`
- `xrpl.rpc.status``success` or `error`
- `command` — the RPC method name
- `version` — API version
- `rpc_role``admin` or `user`
- `rpc_status``success` or `error`
## Running Tests

View File

@@ -65,71 +65,71 @@ All spans instrumented in xrpld, grouped by subsystem:
### RPC Spans (Phase 2)
| Span Name | Source File | Attributes | Description |
| -------------------- | --------------------- | ---------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------- |
| `rpc.request` | ServerHandler.cpp:271 | — | Top-level HTTP RPC request |
| `rpc.process` | ServerHandler.cpp:573 | — | RPC processing (child of rpc.request) |
| `rpc.ws_message` | ServerHandler.cpp:384 | — | WebSocket RPC message |
| `rpc.command.<name>` | RPCHandler.cpp:161 | `xrpl.rpc.command`, `xrpl.rpc.version`, `xrpl.rpc.role`, `xrpl.rpc.status`, `xrpl.rpc.duration_ms`, `xrpl.rpc.error_message` | Per-command span (e.g., `rpc.command.server_info`) |
| Span Name | Source File | Attributes | Description |
| -------------------- | --------------------- | ------------------------------------------------------------------------------ | -------------------------------------------------- |
| `rpc.request` | ServerHandler.cpp:271 | — | Top-level HTTP RPC request |
| `rpc.process` | ServerHandler.cpp:573 | — | RPC processing (child of rpc.request) |
| `rpc.ws_message` | ServerHandler.cpp:384 | — | WebSocket RPC message |
| `rpc.command.<name>` | RPCHandler.cpp:161 | `command`, `version`, `rpc_role`, `rpc_status`, `duration_ms`, `error_message` | Per-command span (e.g., `rpc.command.server_info`) |
### Transaction Spans (Phase 3)
| Span Name | Source File | Attributes | Description |
| ------------ | ------------------- | ------------------------------------------------------------------------------------------- | ------------------------------------- |
| `tx.process` | NetworkOPs.cpp:1227 | `xrpl.tx.hash`, `xrpl.tx.local`, `xrpl.tx.path` | Transaction submission and processing |
| `tx.receive` | PeerImp.cpp:1273 | `xrpl.peer.id`, `xrpl.tx.hash`, `xrpl.peer.version`, `xrpl.tx.suppressed`, `xrpl.tx.status` | Transaction received from peer relay |
| `tx.apply` | BuildLedger.cpp:88 | `xrpl.ledger.seq`, `xrpl.ledger.tx_count`, `xrpl.ledger.tx_failed` | Transaction set applied per ledger |
| Span Name | Source File | Attributes | Description |
| ------------ | ------------------- | ------------------------------------------------------------------------- | ------------------------------------- |
| `tx.process` | NetworkOPs.cpp:1227 | `xrpl.tx.hash`, `local`, `path` | Transaction submission and processing |
| `tx.receive` | PeerImp.cpp:1273 | `xrpl.peer.id`, `xrpl.tx.hash`, `peer_version`, `suppressed`, `tx_status` | Transaction received from peer relay |
| `tx.apply` | BuildLedger.cpp:88 | `xrpl.ledger.seq`, `tx_count`, `tx_failed` | Transaction set applied per ledger |
### Transaction Queue Spans (Phase 3)
| Span Name | Source File | Attributes | Description |
| ------------------ | ----------- | --------------------------------------------------------------------- | -------------------------------------------------- |
| `txq.enqueue` | TxQ.cpp | `xrpl.txq.tx_hash` | Transaction enqueue decision (child of tx.process) |
| `txq.apply_direct` | TxQ.cpp | -- | Direct apply attempt (bypassing queue) |
| `txq.batch_clear` | TxQ.cpp | -- | Batch clear of queued transactions for an account |
| `txq.accept` | TxQ.cpp | `xrpl.txq.queue_size` | Ledger-close accept loop over queued transactions |
| `txq.accept_tx` | TxQ.cpp | `xrpl.txq.tx_hash`, `xrpl.txq.retries_remaining`, `xrpl.txq.ter_code` | Per-transaction apply during accept |
| `txq.cleanup` | TxQ.cpp | `xrpl.txq.ledger_seq` | Post-close cleanup of expired queue entries |
| Span Name | Source File | Attributes | Description |
| ------------------ | ----------- | ----------------------------------------------- | -------------------------------------------------- |
| `txq.enqueue` | TxQ.cpp | `xrpl.tx.hash` | Transaction enqueue decision (child of tx.process) |
| `txq.apply_direct` | TxQ.cpp | -- | Direct apply attempt (bypassing queue) |
| `txq.batch_clear` | TxQ.cpp | -- | Batch clear of queued transactions for an account |
| `txq.accept` | TxQ.cpp | `queue_size` | Ledger-close accept loop over queued transactions |
| `txq.accept_tx` | TxQ.cpp | `xrpl.tx.hash`, `retries_remaining`, `ter_code` | Per-transaction apply during accept |
| `txq.cleanup` | TxQ.cpp | `xrpl.ledger.seq` | Post-close cleanup of expired queue entries |
### Consensus Spans (Phase 4)
| Span Name | Source File | Attributes | Description |
| ------------------------------ | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------- |
| `consensus.round` | RCLConsensus.cpp | `xrpl.consensus.ledger_id`, `xrpl.consensus.ledger.seq`, `xrpl.consensus.mode`, `xrpl.consensus.trace_strategy`, `xrpl.consensus.round_id` | Root span for a consensus round (deterministic or random trace ID) |
| `consensus.phase.open` | Consensus.h | -- | Open phase duration (child of round) |
| `consensus.proposal.send` | RCLConsensus.cpp | `xrpl.consensus.round` | Consensus proposal broadcast |
| `consensus.ledger_close` | RCLConsensus.cpp | `xrpl.consensus.ledger.seq`, `xrpl.consensus.mode` | Ledger close event |
| `consensus.establish` | Consensus.h | `xrpl.consensus.converge_percent`, `xrpl.consensus.establish_count`, `xrpl.consensus.proposers` | Establish phase duration (child of round) |
| `consensus.update_positions` | Consensus.h | `xrpl.consensus.converge_percent`, `xrpl.consensus.proposers`, `xrpl.consensus.disputes_count` | Position update and dispute resolution (see Events below) |
| `consensus.check` | Consensus.h | `xrpl.consensus.agree_count`, `xrpl.consensus.disagree_count`, `xrpl.consensus.converge_percent`, `xrpl.consensus.have_close_time_consensus`, `xrpl.consensus.threshold_percent`, `xrpl.consensus.result` | Consensus threshold check |
| `consensus.accept` | RCLConsensus.cpp | `xrpl.consensus.proposers`, `xrpl.consensus.round_time_ms`, `xrpl.consensus.quorum` | Ledger accepted by consensus |
| `consensus.accept.apply` | RCLConsensus.cpp | `xrpl.consensus.ledger.seq`, `xrpl.consensus.close_time`, `xrpl.consensus.close_time_correct`, `xrpl.consensus.close_resolution_ms`, `xrpl.consensus.state`, `xrpl.consensus.proposing`, `xrpl.consensus.round_time_ms`, `xrpl.consensus.parent_close_time`, `xrpl.consensus.close_time_self`, `xrpl.consensus.close_time_vote_bins`, `xrpl.consensus.resolution_direction`, `xrpl.consensus.tx_count` | Ledger application with close time details (see Events below) |
| `consensus.validation.send` | RCLConsensus.cpp | `xrpl.consensus.ledger.seq`, `xrpl.consensus.proposing` | Validation sent after accept (follows-from link) |
| `consensus.mode_change` | RCLConsensus.cpp | `xrpl.consensus.mode.old`, `xrpl.consensus.mode.new` | Consensus mode transition |
| `consensus.proposal.receive` | PeerImp.cpp | `xrpl.consensus.trusted`, `xrpl.consensus.round` | Proposal received from peer (extracts parent context from TraceContext when present; falls back to standalone span for older peers) |
| `consensus.validation.receive` | PeerImp.cpp | `xrpl.consensus.trusted`, `xrpl.consensus.ledger.seq` | Validation received from peer (extracts parent context from TraceContext when present; falls back to standalone span for older peers) |
| Span Name | Source File | Attributes | Description |
| ------------------------------ | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
| `consensus.round` | RCLConsensus.cpp | `xrpl.consensus.ledger_id`, `xrpl.ledger.seq`, `xrpl.consensus.mode`, `trace_strategy`, `xrpl.consensus.round_id` | Root span for a consensus round (deterministic or random trace ID) |
| `consensus.phase.open` | Consensus.h | -- | Open phase duration (child of round) |
| `consensus.proposal.send` | RCLConsensus.cpp | `xrpl.consensus.round` | Consensus proposal broadcast |
| `consensus.ledger_close` | RCLConsensus.cpp | `xrpl.ledger.seq`, `xrpl.consensus.mode` | Ledger close event |
| `consensus.establish` | Consensus.h | `converge_percent`, `establish_count`, `proposers` | Establish phase duration (child of round) |
| `consensus.update_positions` | Consensus.h | `converge_percent`, `proposers`, `disputes_count` | Position update and dispute resolution (see Events below) |
| `consensus.check` | Consensus.h | `agree_count`, `disagree_count`, `converge_percent`, `have_close_time_consensus`, `threshold_percent`, `consensus_result` | Consensus threshold check |
| `consensus.accept` | RCLConsensus.cpp | `proposers`, `round_time_ms`, `quorum` | Ledger accepted by consensus |
| `consensus.accept.apply` | RCLConsensus.cpp | `xrpl.ledger.seq`, `close_time`, `close_time_correct`, `close_resolution_ms`, `consensus_state`, `proposing`, `round_time_ms`, `parent_close_time`, `close_time_self`, `close_time_vote_bins`, `resolution_direction`, `tx_count` | Ledger application with close time details (see Events below) |
| `consensus.validation.send` | RCLConsensus.cpp | `xrpl.ledger.seq`, `proposing` | Validation sent after accept (follows-from link) |
| `consensus.mode_change` | RCLConsensus.cpp | `mode_old`, `mode_new` | Consensus mode transition |
| `consensus.proposal.receive` | PeerImp.cpp | `trusted`, `xrpl.consensus.round` | Proposal received from peer (extracts parent context from TraceContext when present; falls back to standalone span for older peers) |
| `consensus.validation.receive` | PeerImp.cpp | `trusted`, `xrpl.ledger.seq` | Validation received from peer (extracts parent context from TraceContext when present; falls back to standalone span for older peers) |
#### Consensus Span Events
| Parent Span | Event Name | Event Attributes | Description |
| ---------------------------- | ----------------- | ------------------------------------------------------------------------------- | ------------------------------------------------------- |
| `consensus.update_positions` | `dispute.resolve` | `xrpl.tx.id`, `xrpl.dispute.our_vote`, `xrpl.dispute.yays`, `xrpl.dispute.nays` | Emitted per dispute when votes are tallied |
| `consensus.accept.apply` | `tx.included` | `xrpl.tx.id` | Emitted per transaction included in the accepted ledger |
| Parent Span | Event Name | Event Attributes | Description |
| ---------------------------- | ----------------- | ---------------------------------------------------------------- | ------------------------------------------------------- |
| `consensus.update_positions` | `dispute.resolve` | `xrpl.tx.id`, `dispute_our_vote`, `dispute_yays`, `dispute_nays` | Emitted per dispute when votes are tallied |
| `consensus.accept.apply` | `tx.included` | `xrpl.tx.id` | Emitted per transaction included in the accepted ledger |
#### Close Time Queries (Tempo TraceQL)
```
# Find rounds where validators disagreed on close time
{name="consensus.accept.apply"} | xrpl.consensus.close_time_correct = false
{name="consensus.accept.apply"} | close_time_correct = false
# Find consensus failures (moved_on)
{name="consensus.accept.apply"} | xrpl.consensus.state = "moved_on"
{name="consensus.accept.apply"} | consensus_state = "moved_on"
# Find slow ledger applications (>5s)
{name="consensus.accept.apply"} | duration > 5s
# Find specific ledger's consensus details
{name="consensus.accept.apply"} | xrpl.consensus.ledger.seq = 92345678
{name="consensus.accept.apply"} | xrpl.ledger.seq = 92345678
# Find all spans in a consensus round (deterministic trace strategy)
{name="consensus.round"} | xrpl.consensus.round_id = "<round_id>"
@@ -140,18 +140,18 @@ All spans instrumented in xrpld, grouped by subsystem:
### Ledger Spans (Phase 5)
| Span Name | Source File | Attributes | Description |
| ----------------- | -------------------- | ------------------------------------------------------------------ | ----------------------------- |
| `ledger.build` | BuildLedger.cpp:31 | `xrpl.ledger.seq`, `xrpl.ledger.tx_count`, `xrpl.ledger.tx_failed` | Ledger build during consensus |
| `ledger.validate` | LedgerMaster.cpp:915 | `xrpl.ledger.seq`, `xrpl.ledger.validations` | Ledger promoted to validated |
| `ledger.store` | LedgerMaster.cpp:409 | `xrpl.ledger.seq` | Ledger stored in history |
| Span Name | Source File | Attributes | Description |
| ----------------- | -------------------- | ------------------------------------------ | ----------------------------- |
| `ledger.build` | BuildLedger.cpp:31 | `xrpl.ledger.seq`, `tx_count`, `tx_failed` | Ledger build during consensus |
| `ledger.validate` | LedgerMaster.cpp:915 | `xrpl.ledger.seq`, `validations` | Ledger promoted to validated |
| `ledger.store` | LedgerMaster.cpp:409 | `xrpl.ledger.seq` | Ledger stored in history |
### Peer Spans (Phase 5)
| Span Name | Source File | Attributes | Description |
| ------------------------- | ---------------- | ---------------------------------------------- | ----------------------------- |
| `peer.proposal.receive` | PeerImp.cpp:1667 | `xrpl.peer.id`, `xrpl.peer.proposal.trusted` | Proposal received from peer |
| `peer.validation.receive` | PeerImp.cpp:2264 | `xrpl.peer.id`, `xrpl.peer.validation.trusted` | Validation received from peer |
| Span Name | Source File | Attributes | Description |
| ------------------------- | ---------------- | ------------------------------------ | ----------------------------- |
| `peer.proposal.receive` | PeerImp.cpp:1667 | `xrpl.peer.id`, `proposal_trusted` | Proposal received from peer |
| `peer.validation.receive` | PeerImp.cpp:2264 | `xrpl.peer.id`, `validation_trusted` | Validation received from peer |
## Cross-Node Trace Propagation
@@ -260,14 +260,14 @@ Every metric carries these standard labels:
Additionally, span attributes configured as dimensions in the collector become metric labels (dots → underscores):
| Span Attribute | Metric Label | Applies To |
| ------------------------------ | ------------------------------ | ------------------------------- |
| `xrpl.rpc.command` | `xrpl_rpc_command` | `rpc.command.*` spans |
| `xrpl.rpc.status` | `xrpl_rpc_status` | `rpc.command.*` spans |
| `xrpl.consensus.mode` | `xrpl_consensus_mode` | `consensus.ledger_close` spans |
| `xrpl.tx.local` | `xrpl_tx_local` | `tx.process` spans |
| `xrpl.peer.proposal.trusted` | `xrpl_peer_proposal_trusted` | `peer.proposal.receive` spans |
| `xrpl.peer.validation.trusted` | `xrpl_peer_validation_trusted` | `peer.validation.receive` spans |
| Span Attribute | Metric Label | Applies To |
| --------------------- | ------------------------------ | ------------------------------- |
| `command` | `xrpl_rpc_command` | `rpc.command.*` spans |
| `rpc_status` | `xrpl_rpc_status` | `rpc.command.*` spans |
| `xrpl.consensus.mode` | `xrpl_consensus_mode` | `consensus.ledger_close` spans |
| `local` | `xrpl_tx_local` | `tx.process` spans |
| `proposal_trusted` | `xrpl_peer_proposal_trusted` | `peer.proposal.receive` spans |
| `validation_trusted` | `xrpl_peer_validation_trusted` | `peer.validation.receive` spans |
### Histogram Buckets

View File

@@ -53,7 +53,7 @@
auto span = SpanGuard::span(
TraceCategory::Rpc, rpc_span::prefix::command, "submit");
span.setAttribute(rpc_span::attr::command, "submit");
span.setAttribute(rpc_span::attr::status, rpc_span::val::success);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::success);
// span ended automatically on scope exit
@endcode
@@ -86,7 +86,7 @@
TraceCategory::Rpc, rpc_span::prefix::rpc, "request");
if (span) {
// expensive attribute computation only when active
span.setAttribute(rpc_span::attr::payloadSize, computeSize());
span.setAttribute(rpc_span::attr::requestPayloadSize, computeSize());
}
@endcode

View File

@@ -16,9 +16,12 @@
* concatenation support. boost::static_string is not constexpr.
* StaticStr<N> exists specifically for compile-time dot-join composition.
*
* Naming conventions follow OpenTelemetry semantic conventions:
* - Attribute keys: "xrpl.<subsystem>.<field>"
* - Span prefixes: "<subsystem>[.<component>]"
* Naming conventions (see spec 2026-05-13-span-attr-naming-design):
* - Per-span attribute keys: bare field name (span name carries the domain).
* - Collision qualifier: <domain>_<field> when bare name collides across
* domains or with OTel reserved `status` (e.g. rpc_status, grpc_status).
* - Resource attribute keys: xrpl.<subsystem>.<field> (process-identity).
* - Span prefixes: <subsystem>[.<component>].
*/
#include <cstddef>
@@ -98,14 +101,28 @@ inline constexpr auto link = makeStr("link");
namespace attr {
inline constexpr auto networkId = join(join(seg::xrpl, seg::network), makeStr("id"));
inline constexpr auto networkType = join(join(seg::xrpl, seg::network), makeStr("type"));
inline constexpr auto linkType = join(join(seg::xrpl, seg::link), makeStr("type"));
inline constexpr auto linkType = makeStr("link_type");
/// Node health attributes (cross-cutting, used by RPC/consensus/tx spans).
/// Node health attributes — RESOURCE-ONLY (process identity, not per-span).
/// Set at Tracer init via resource::Resource::Create and refreshed on state
/// transitions. Do NOT use with span.setAttribute().
inline constexpr auto xrplNode = join(seg::xrpl, makeStr("node"));
/// "xrpl.node.amendment_blocked"
/// "xrpl.node.amendment_blocked" — resource attribute key.
inline constexpr auto nodeAmendmentBlocked = join(xrplNode, makeStr("amendment_blocked"));
/// "xrpl.node.server_state"
/// "xrpl.node.server_state" — resource attribute key.
inline constexpr auto nodeServerState = join(xrplNode, makeStr("server_state"));
/// Canonical shared attrs (rule 5 — kept xrpl.<domain>.* form).
/// Defined once here, aliased by domain-specific headers.
inline constexpr auto txHash = join(join(seg::xrpl, seg::tx), makeStr("hash"));
inline constexpr auto peerId = join(join(seg::xrpl, seg::peer), makeStr("id"));
inline constexpr auto ledgerSeq = join(join(seg::xrpl, seg::ledger), makeStr("seq"));
/// Shared close-time attrs — bare names, reused by consensus and ledger.
inline constexpr auto closeTime = makeStr("close_time");
inline constexpr auto closeTimeCorrect = makeStr("close_time_correct");
inline constexpr auto closeResolutionMs = makeStr("close_resolution_ms");
inline constexpr auto ledgerHash = join(join(seg::xrpl, seg::ledger), makeStr("hash"));
} // namespace attr
// ===== Shared attribute values =============================================

View File

@@ -166,7 +166,7 @@ private:
std::string m_name;
GaugeImpl::value_type m_last_value{0};
GaugeImpl::value_type m_value{0};
bool m_dirty{false};
bool m_dirty{true};
};
//------------------------------------------------------------------------------
@@ -583,6 +583,9 @@ StatsDEventImpl::do_notify(EventImpl::value_type const& value)
StatsDGaugeImpl::StatsDGaugeImpl(std::string name, std::shared_ptr<StatsDCollectorImp> const& impl)
: m_impl(impl), m_name(std::move(name))
{
// Start dirty so the initial value (0) is emitted on the first flush.
// Without this, gauges whose value never changes from 0 would never
// appear in downstream metric stores (e.g. Prometheus via StatsD).
m_impl->add(*this);
}

View File

@@ -569,7 +569,8 @@ RCLConsensus::Adaptor::doAccept(
static_cast<int64_t>(
std::chrono::duration_cast<std::chrono::milliseconds>(closeResolution).count()));
doAcceptSpan.setAttribute(
telemetry::cons_span::attr::state, std::string(consensusFail ? "moved_on" : "finished"));
telemetry::cons_span::attr::consensusState,
std::string(consensusFail ? "moved_on" : "finished"));
doAcceptSpan.setAttribute(telemetry::cons_span::attr::proposing, proposing);
doAcceptSpan.setAttribute(
telemetry::cons_span::attr::roundTimeMs,

View File

@@ -82,7 +82,7 @@ buildLedgerImpl(
built->header().seq < XRP_LEDGER_EARLIEST_FEES || built->read(keylet::fees()),
"xrpl::buildLedgerImpl : valid ledger fees");
built->setAccepted(closeTime, closeResolution, closeTimeCorrect);
buildSpan.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(built->header().seq));
buildSpan.setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(built->header().seq));
buildSpan.setAttribute(
ledger_span::attr::closeTime, static_cast<int64_t>(closeTime.time_since_epoch().count()));
buildSpan.setAttribute(ledger_span::attr::closeTimeCorrect, closeTimeCorrect);

View File

@@ -460,7 +460,7 @@ LedgerMaster::storeLedger(std::shared_ptr<Ledger const> ledger)
{
using namespace telemetry;
auto span = SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::store);
span.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(ledger->header().seq));
span.setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(ledger->header().seq));
bool const validated = ledger->header().validated;
// Returns true if we already had the ledger
@@ -980,7 +980,7 @@ LedgerMaster::checkAccept(std::shared_ptr<Ledger const> const& ledger)
using namespace telemetry;
auto valSpan = SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::validate);
valSpan.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(ledger->header().seq));
valSpan.setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(ledger->header().seq));
valSpan.setAttribute(ledger_span::attr::validations, static_cast<int64_t>(tvc));
JLOG(m_journal.info()) << "Advancing accepted ledger to " << ledger->header().seq

View File

@@ -29,22 +29,17 @@ inline constexpr auto apply = makeStr("apply");
// ===== Attribute keys ========================================================
namespace attr {
inline constexpr auto xrplLedger = join(seg::xrpl, seg::ledger);
/// Canonical shared constants (defined in SpanNames.h).
using ::xrpl::telemetry::attr::closeResolutionMs;
using ::xrpl::telemetry::attr::closeTime;
using ::xrpl::telemetry::attr::closeTimeCorrect;
using ::xrpl::telemetry::attr::ledgerHash;
using ::xrpl::telemetry::attr::ledgerSeq;
/// "xrpl.ledger.seq"
inline constexpr auto seq = join(xrplLedger, makeStr("seq"));
/// "xrpl.ledger.close_time"
inline constexpr auto closeTime = join(xrplLedger, makeStr("close_time"));
/// "xrpl.ledger.close_time_correct"
inline constexpr auto closeTimeCorrect = join(xrplLedger, makeStr("close_time_correct"));
/// "xrpl.ledger.close_resolution_ms"
inline constexpr auto closeResolutionMs = join(xrplLedger, makeStr("close_resolution_ms"));
/// "xrpl.ledger.tx_count"
inline constexpr auto txCount = join(xrplLedger, makeStr("tx_count"));
/// "xrpl.ledger.tx_failed"
inline constexpr auto txFailed = join(xrplLedger, makeStr("tx_failed"));
/// "xrpl.ledger.validations"
inline constexpr auto validations = join(xrplLedger, makeStr("validations"));
/// Domain-owned bare attrs.
inline constexpr auto txCount = makeStr("tx_count");
inline constexpr auto txFailed = makeStr("tx_failed");
inline constexpr auto validations = makeStr("validations");
} // namespace attr
} // namespace xrpl::telemetry::ledger_span

View File

@@ -150,6 +150,7 @@ private:
beast::Journal m_journal;
beast::io_latency_probe<std::chrono::steady_clock> m_probe;
std::atomic<std::chrono::milliseconds> lastSample_;
std::atomic<bool> firstSample_;
public:
io_latency_sampler(
@@ -157,7 +158,7 @@ private:
beast::Journal journal,
std::chrono::milliseconds interval,
boost::asio::io_context& ios)
: m_event(std::move(ev)), m_journal(journal), m_probe(interval, ios)
: m_event(std::move(ev)), m_journal(journal), m_probe(interval, ios), firstSample_(true)
{
}
@@ -176,7 +177,10 @@ private:
lastSample_ = lastSample;
if (lastSample >= 10ms)
// Always emit the first sample so the metric is registered in
// downstream stores (Prometheus via StatsD). After that, only
// report latency >= 10 ms to avoid flooding with sub-ms values.
if (firstSample_.exchange(false) || lastSample >= 10ms)
m_event.notify(lastSample);
if (lastSample >= 500ms)
{

View File

@@ -11,7 +11,7 @@
* +-------------------------------------------------------+
* | grpc.request |
* | CallData::process(coro) |
* | attrs: method, role, status |
* | attrs: method, grpc_role, grpc_status |
* +-------------------------------------------------------+
*
* Unlike the HTTP/WS RPC path, gRPC has a flat single-span structure
@@ -38,14 +38,12 @@ inline constexpr auto request = makeStr("request");
// ===== Attribute keys ======================================================
namespace attr {
inline constexpr auto xrplGrpc = join(seg::xrpl, makeStr("grpc"));
/// "xrpl.grpc.method"
inline constexpr auto method = join(xrplGrpc, makeStr("method"));
/// "xrpl.grpc.role"
inline constexpr auto role = join(xrplGrpc, makeStr("role"));
/// "xrpl.grpc.status"
inline constexpr auto status = join(xrplGrpc, makeStr("status"));
/// "method" — gRPC method name (e.g. GetLedger).
inline constexpr auto method = makeStr("method");
/// "grpc_role" — Domain-qualified: collides with rpc_role.
inline constexpr auto grpcRole = makeStr("grpc_role");
/// "grpc_status" — Domain-qualified: avoids OTel reserved span status.
inline constexpr auto grpcStatus = makeStr("grpc_status");
} // namespace attr
// ===== Attribute values ====================================================

View File

@@ -1326,7 +1326,7 @@ NetworkOPsImp::processTransaction(
{
using namespace telemetry;
auto span = std::make_shared<SpanGuard>(txProcessSpan(transaction->getID()));
span->setAttribute(tx_span::attr::hash, to_string(transaction->getID()).c_str());
span->setAttribute(tx_span::attr::txHash, to_string(transaction->getID()).c_str());
span->setAttribute(tx_span::attr::local, bLocal);
auto ev = m_job_queue.makeLoadEvent(jtTXN_PROC, "ProcessTXN");

View File

@@ -41,25 +41,20 @@ inline constexpr auto process = join(prefix::tx, op::process);
// ===== Attribute keys ======================================================
namespace attr {
inline constexpr auto xrplTx = join(seg::xrpl, seg::tx);
/// Canonical shared constants (defined in SpanNames.h).
using ::xrpl::telemetry::attr::peerId;
using ::xrpl::telemetry::attr::txHash;
/// "xrpl.tx.hash"
inline constexpr auto hash = join(xrplTx, makeStr("hash"));
/// "xrpl.tx.local"
inline constexpr auto local = join(xrplTx, makeStr("local"));
/// "xrpl.tx.path"
inline constexpr auto path = join(xrplTx, makeStr("path"));
/// "xrpl.tx.suppressed"
inline constexpr auto suppressed = join(xrplTx, makeStr("suppressed"));
/// "xrpl.tx.status"
inline constexpr auto status = join(xrplTx, makeStr("status"));
inline constexpr auto xrplPeer = join(seg::xrpl, seg::peer);
/// "xrpl.peer.id"
inline constexpr auto peerId = join(xrplPeer, makeStr("id"));
/// "xrpl.peer.version"
inline constexpr auto peerVersion = join(xrplPeer, makeStr("version"));
/// "local" — whether tx originated locally.
inline constexpr auto local = makeStr("local");
/// "path" — sync or async processing path.
inline constexpr auto path = makeStr("path");
/// "suppressed" — whether tx was suppressed as duplicate.
inline constexpr auto suppressed = makeStr("suppressed");
/// "tx_status" — domain-qualified (collides with rpc_status, txq_status).
inline constexpr auto txStatus = makeStr("tx_status");
/// "peer_version" — version of peer that sent the tx.
inline constexpr auto peerVersion = makeStr("peer_version");
} // namespace attr
// ===== Attribute values ====================================================

View File

@@ -71,30 +71,28 @@ inline constexpr auto cleanup = makeStr("cleanup");
// ===== Attribute keys ======================================================
namespace attr {
inline constexpr auto xrplTxq = join(seg::xrpl, makeStr("txq"));
/// Canonical shared constants (defined in SpanNames.h).
using ::xrpl::telemetry::attr::ledgerSeq;
using ::xrpl::telemetry::attr::txHash;
/// "xrpl.txq.tx_hash"
inline constexpr auto txHash = join(xrplTxq, makeStr("tx_hash"));
/// "xrpl.txq.status"
inline constexpr auto status = join(xrplTxq, makeStr("status"));
/// "xrpl.txq.fee_level_paid"
inline constexpr auto feeLevelPaid = join(xrplTxq, makeStr("fee_level_paid"));
/// "xrpl.txq.required_fee_level"
inline constexpr auto requiredFeeLevel = join(xrplTxq, makeStr("required_fee_level"));
/// "xrpl.txq.queue_size"
inline constexpr auto queueSize = join(xrplTxq, makeStr("queue_size"));
/// "xrpl.txq.ledger_changed"
inline constexpr auto ledgerChanged = join(xrplTxq, makeStr("ledger_changed"));
/// "xrpl.txq.ledger_seq"
inline constexpr auto ledgerSeq = join(xrplTxq, makeStr("ledger_seq"));
/// "xrpl.txq.expired_count"
inline constexpr auto expiredCount = join(xrplTxq, makeStr("expired_count"));
/// "xrpl.txq.ter_code"
inline constexpr auto terCode = join(xrplTxq, makeStr("ter_code"));
/// "xrpl.txq.retries_remaining"
inline constexpr auto retriesRemaining = join(xrplTxq, makeStr("retries_remaining"));
/// "xrpl.txq.num_cleared"
inline constexpr auto numCleared = join(xrplTxq, makeStr("num_cleared"));
/// "txq_status" — domain-qualified (collides with tx_status, rpc_status).
inline constexpr auto txqStatus = makeStr("txq_status");
/// "fee_level_paid" — fee level paid by queued tx.
inline constexpr auto feeLevelPaid = makeStr("fee_level_paid");
/// "required_fee_level" — minimum fee level for inclusion.
inline constexpr auto requiredFeeLevel = makeStr("required_fee_level");
/// "queue_size" — current TxQ depth.
inline constexpr auto queueSize = makeStr("queue_size");
/// "ledger_changed" — whether ledger changed since last attempt.
inline constexpr auto ledgerChanged = makeStr("ledger_changed");
/// "expired_count" — number of expired entries cleared.
inline constexpr auto expiredCount = makeStr("expired_count");
/// "ter_code" — transaction engine result code.
inline constexpr auto terCode = makeStr("ter_code");
/// "retries_remaining" — retries left before discard.
inline constexpr auto retriesRemaining = makeStr("retries_remaining");
/// "num_cleared" — entries cleared in batch.
inline constexpr auto numCleared = makeStr("num_cleared");
} // namespace attr
// ===== Attribute values ====================================================

View File

@@ -1812,7 +1812,7 @@ Consensus<Adaptor>::haveConsensus(std::unique_ptr<std::stringstream> const& clog
{
stateStr = "expired";
}
span.setAttribute(cons_span::attr::result, stateStr);
span.setAttribute(cons_span::attr::consensusResult, stateStr);
CLOG(clog) << "Consensus has been reached. ";
// NOLINTEND(bugprone-unchecked-optional-access)

View File

@@ -124,96 +124,51 @@ inline constexpr auto phaseOpen = join(seg::consensus, op::phaseOpen);
// ===== Attribute keys ========================================================
namespace attr {
inline constexpr auto xrplConsensus = join(seg::xrpl, seg::consensus);
/// Canonical shared constants (defined in SpanNames.h).
using ::xrpl::telemetry::attr::closeResolutionMs;
using ::xrpl::telemetry::attr::closeTime;
using ::xrpl::telemetry::attr::closeTimeCorrect;
using ::xrpl::telemetry::attr::ledgerSeq;
/// "xrpl.consensus.ledger_id"
inline constexpr auto ledgerId = join(xrplConsensus, makeStr("ledger_id"));
/// "xrpl.consensus.ledger.seq"
inline constexpr auto ledgerSeq = join(join(xrplConsensus, makeStr("ledger")), makeStr("seq"));
/// "xrpl.consensus.mode"
inline constexpr auto mode = join(xrplConsensus, makeStr("mode"));
/// "xrpl.consensus.round"
inline constexpr auto round = join(xrplConsensus, makeStr("round"));
/// "xrpl.consensus.proposers"
inline constexpr auto proposers = join(xrplConsensus, makeStr("proposers"));
/// "xrpl.consensus.round_time_ms"
inline constexpr auto roundTimeMs = join(xrplConsensus, makeStr("round_time_ms"));
/// "xrpl.consensus.proposing"
inline constexpr auto proposing = join(xrplConsensus, makeStr("proposing"));
/// "xrpl.consensus.state"
inline constexpr auto state = join(xrplConsensus, makeStr("state"));
/// Kept qualified (rule 5 — bare name ambiguous across domains).
inline constexpr auto ledgerId = join(join(seg::xrpl, seg::consensus), makeStr("ledger_id"));
inline constexpr auto mode = join(join(seg::xrpl, seg::consensus), makeStr("mode"));
inline constexpr auto round = join(join(seg::xrpl, seg::consensus), makeStr("round"));
inline constexpr auto roundId = join(join(seg::xrpl, seg::consensus), makeStr("round_id"));
// Close time attributes
/// "xrpl.consensus.close_time"
inline constexpr auto closeTime = join(xrplConsensus, makeStr("close_time"));
/// "xrpl.consensus.close_time_correct"
inline constexpr auto closeTimeCorrect = join(xrplConsensus, makeStr("close_time_correct"));
/// "xrpl.consensus.close_resolution_ms"
inline constexpr auto closeResolutionMs = join(xrplConsensus, makeStr("close_resolution_ms"));
/// "xrpl.consensus.parent_close_time"
inline constexpr auto parentCloseTime = join(xrplConsensus, makeStr("parent_close_time"));
/// "xrpl.consensus.close_time_self"
inline constexpr auto closeTimeSelf = join(xrplConsensus, makeStr("close_time_self"));
/// "xrpl.consensus.close_time_vote_bins"
inline constexpr auto closeTimeVoteBins = join(xrplConsensus, makeStr("close_time_vote_bins"));
/// "xrpl.consensus.resolution_direction"
inline constexpr auto resolutionDirection = join(xrplConsensus, makeStr("resolution_direction"));
/// Domain-owned bare attrs.
inline constexpr auto proposers = makeStr("proposers");
inline constexpr auto roundTimeMs = makeStr("round_time_ms");
inline constexpr auto proposing = makeStr("proposing");
/// "consensus_state" — domain-qualified (collides with other domains' state).
inline constexpr auto consensusState = makeStr("consensus_state");
inline constexpr auto parentCloseTime = makeStr("parent_close_time");
inline constexpr auto closeTimeSelf = makeStr("close_time_self");
inline constexpr auto closeTimeVoteBins = makeStr("close_time_vote_bins");
inline constexpr auto resolutionDirection = makeStr("resolution_direction");
inline constexpr auto convergePercent = makeStr("converge_percent");
inline constexpr auto establishCount = makeStr("establish_count");
inline constexpr auto avalancheThreshold = makeStr("avalanche_threshold");
inline constexpr auto closeTimeThreshold = makeStr("close_time_threshold");
inline constexpr auto haveCloseTimeConsensus = makeStr("have_close_time_consensus");
inline constexpr auto agreeCount = makeStr("agree_count");
inline constexpr auto disagreeCount = makeStr("disagree_count");
inline constexpr auto thresholdPercent = makeStr("threshold_percent");
/// "consensus_result" — domain-qualified (collides with generic result).
inline constexpr auto consensusResult = makeStr("consensus_result");
inline constexpr auto quorum = makeStr("quorum");
inline constexpr auto traceStrategy = makeStr("trace_strategy");
inline constexpr auto modeOld = makeStr("mode_old");
inline constexpr auto modeNew = makeStr("mode_new");
// Establish/convergence attributes
/// "xrpl.consensus.converge_percent"
inline constexpr auto convergePercent = join(xrplConsensus, makeStr("converge_percent"));
/// "xrpl.consensus.establish_count"
inline constexpr auto establishCount = join(xrplConsensus, makeStr("establish_count"));
// Avalanche threshold attributes
/// "xrpl.consensus.avalanche_threshold"
inline constexpr auto avalancheThreshold = join(xrplConsensus, makeStr("avalanche_threshold"));
/// "xrpl.consensus.close_time_threshold"
inline constexpr auto closeTimeThreshold = join(xrplConsensus, makeStr("close_time_threshold"));
/// "xrpl.consensus.have_close_time_consensus"
inline constexpr auto haveCloseTimeConsensus =
join(xrplConsensus, makeStr("have_close_time_consensus"));
// Consensus check attributes
/// "xrpl.consensus.agree_count"
inline constexpr auto agreeCount = join(xrplConsensus, makeStr("agree_count"));
/// "xrpl.consensus.disagree_count"
inline constexpr auto disagreeCount = join(xrplConsensus, makeStr("disagree_count"));
/// "xrpl.consensus.threshold_percent"
inline constexpr auto thresholdPercent = join(xrplConsensus, makeStr("threshold_percent"));
/// "xrpl.consensus.result"
inline constexpr auto result = join(xrplConsensus, makeStr("result"));
/// "xrpl.consensus.quorum"
inline constexpr auto quorum = join(xrplConsensus, makeStr("quorum"));
// Trace strategy attribute
/// "xrpl.consensus.trace_strategy"
inline constexpr auto traceStrategy = join(xrplConsensus, makeStr("trace_strategy"));
/// "xrpl.consensus.round_id"
inline constexpr auto roundId = join(xrplConsensus, makeStr("round_id"));
// Mode change attributes
/// "xrpl.consensus.mode.old"
inline constexpr auto modeOld = join(join(xrplConsensus, makeStr("mode")), makeStr("old"));
/// "xrpl.consensus.mode.new"
inline constexpr auto modeNew = join(join(xrplConsensus, makeStr("mode")), makeStr("new"));
// Dispute event attributes
/// "xrpl.tx.id"
/// Transaction/dispute attrs used in consensus accept spans.
inline constexpr auto txId = join(join(seg::xrpl, seg::tx), makeStr("id"));
/// "xrpl.dispute.our_vote"
inline constexpr auto disputeOurVote =
join(join(seg::xrpl, makeStr("dispute")), makeStr("our_vote"));
/// "xrpl.dispute.yays"
inline constexpr auto disputeYays = join(join(seg::xrpl, makeStr("dispute")), makeStr("yays"));
/// "xrpl.dispute.nays"
inline constexpr auto disputeNays = join(join(seg::xrpl, makeStr("dispute")), makeStr("nays"));
/// "xrpl.consensus.tx_count"
inline constexpr auto txCount = join(xrplConsensus, makeStr("tx_count"));
/// "xrpl.consensus.disputes_count"
inline constexpr auto disputesCount = join(xrplConsensus, makeStr("disputes_count"));
/// "xrpl.consensus.trusted"
inline constexpr auto trusted = join(xrplConsensus, makeStr("trusted"));
inline constexpr auto disputeOurVote = makeStr("dispute_our_vote");
inline constexpr auto disputeYays = makeStr("dispute_yays");
inline constexpr auto disputeNays = makeStr("dispute_nays");
inline constexpr auto txCount = makeStr("tx_count");
inline constexpr auto disputesCount = makeStr("disputes_count");
inline constexpr auto trusted = makeStr("trusted");
} // namespace attr
// ===== Event names ===========================================================

View File

@@ -1447,10 +1447,15 @@ PeerImp::handleTransaction(
using namespace telemetry;
auto span = std::make_shared<SpanGuard>(txReceiveSpan(txID, *m));
span->setAttribute(tx_span::attr::hash, to_string(txID).c_str());
span->setAttribute(tx_span::attr::txHash, to_string(txID).c_str());
span->setAttribute(tx_span::attr::peerId, static_cast<int64_t>(id_));
if (auto const version = getVersion(); !version.empty())
span->setAttribute(tx_span::attr::peerVersion, version.c_str());
// Set defaults for conditional attributes so they are always present
// on the span. The suppressed path overrides these when the
// transaction has already been seen via HashRouter.
span->setAttribute(tx_span::attr::suppressed, false);
span->setAttribute(tx_span::attr::txStatus, "new");
// Charge strongly for attempting to relay a txn with tfInnerBatchTxn
// LCOV_EXCL_START
@@ -1488,7 +1493,7 @@ PeerImp::handleTransaction(
// we have seen this transaction recently
if (any(flags & HashRouterFlags::BAD))
{
span->setAttribute(tx_span::attr::status, tx_span::val::knownBad);
span->setAttribute(tx_span::attr::txStatus, tx_span::val::knownBad);
fee_.update(Resource::feeUselessData, "known bad");
JLOG(p_journal_.debug()) << "Ignoring known bad tx " << txID;
}
@@ -1870,7 +1875,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMProposeSet> const& m)
{
using namespace telemetry;
auto span = SpanGuard::span(TraceCategory::Peer, seg::peer, peer_span::op::proposalReceive);
span.setAttribute(peer_span::attr::id, static_cast<int64_t>(id_));
span.setAttribute(peer_span::attr::peerId, static_cast<int64_t>(id_));
protocol::TMProposeSet const& set = *m;
@@ -2479,7 +2484,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
using namespace telemetry;
auto valSpan =
SpanGuard::span(TraceCategory::Peer, seg::peer, peer_span::op::validationReceive);
valSpan.setAttribute(peer_span::attr::id, static_cast<int64_t>(id_));
valSpan.setAttribute(peer_span::attr::peerId, static_cast<int64_t>(id_));
if (m->validation().size() < 50)
{
@@ -2503,8 +2508,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
false);
val->setSeen(closeTime);
}
valSpan.setAttribute(
peer_span::attr::validationLedgerHash, to_string(val->getLedgerHash()).c_str());
valSpan.setAttribute(peer_span::attr::ledgerHash, to_string(val->getLedgerHash()).c_str());
valSpan.setAttribute(peer_span::attr::validationFull, val->isFull());
if (!isCurrent(

View File

@@ -25,22 +25,14 @@ inline constexpr auto validationReceive = makeStr("validation.receive");
// ===== Attribute keys ========================================================
namespace attr {
inline constexpr auto xrplPeer = join(seg::xrpl, seg::peer);
/// Canonical shared constants (defined in SpanNames.h).
using ::xrpl::telemetry::attr::ledgerHash;
using ::xrpl::telemetry::attr::peerId;
/// "xrpl.peer.id"
inline constexpr auto id = join(xrplPeer, makeStr("id"));
/// "xrpl.peer.proposal.trusted"
inline constexpr auto proposalTrusted =
join(join(xrplPeer, makeStr("proposal")), makeStr("trusted"));
/// "xrpl.peer.validation.ledger_hash"
inline constexpr auto validationLedgerHash =
join(join(xrplPeer, makeStr("validation")), makeStr("ledger_hash"));
/// "xrpl.peer.validation.full"
inline constexpr auto validationFull = join(join(xrplPeer, makeStr("validation")), makeStr("full"));
/// "xrpl.peer.validation.trusted"
inline constexpr auto validationTrusted =
join(join(xrplPeer, makeStr("validation")), makeStr("trusted"));
/// Domain-owned bare attrs.
inline constexpr auto proposalTrusted = makeStr("proposal_trusted");
inline constexpr auto validationFull = makeStr("validation_full");
inline constexpr auto validationTrusted = makeStr("validation_trusted");
} // namespace attr
} // namespace xrpl::telemetry::peer_span

View File

@@ -63,24 +63,24 @@ inline constexpr auto rank = makeStr("rank");
// ===== Attribute keys ======================================================
namespace attr {
inline constexpr auto xrplPathfind = join(seg::xrpl, makeStr("pathfind"));
/// "xrpl.pathfind.source_account"
inline constexpr auto sourceAccount = join(xrplPathfind, makeStr("source_account"));
/// "xrpl.pathfind.dest_account"
inline constexpr auto destAccount = join(xrplPathfind, makeStr("dest_account"));
/// "xrpl.pathfind.fast"
inline constexpr auto fast = join(xrplPathfind, makeStr("fast"));
/// "xrpl.pathfind.search_level"
inline constexpr auto searchLevel = join(xrplPathfind, makeStr("search_level"));
/// "xrpl.pathfind.num_complete_paths"
inline constexpr auto numCompletePaths = join(xrplPathfind, makeStr("num_complete_paths"));
/// "xrpl.pathfind.num_paths"
inline constexpr auto numPaths = join(xrplPathfind, makeStr("num_paths"));
/// "xrpl.pathfind.num_requests"
inline constexpr auto numRequests = join(xrplPathfind, makeStr("num_requests"));
/// "xrpl.pathfind.ledger_index"
inline constexpr auto ledgerIndex = join(xrplPathfind, makeStr("ledger_index"));
/// "source_account" — originating account for path search.
inline constexpr auto sourceAccount = makeStr("source_account");
/// "dest_account" — destination account.
inline constexpr auto destAccount = makeStr("dest_account");
/// "fast" — whether fast pathfinding mode enabled.
inline constexpr auto fast = makeStr("fast");
/// "search_level" — depth of graph exploration.
inline constexpr auto searchLevel = makeStr("search_level");
/// "num_complete_paths" — complete paths found.
inline constexpr auto numCompletePaths = makeStr("num_complete_paths");
/// "num_paths" — total paths returned.
inline constexpr auto numPaths = makeStr("num_paths");
/// "num_requests" — active path requests.
inline constexpr auto numRequests = makeStr("num_requests");
/// "xrpl.pathfind.ledger_index" — kept qualified (rule 5): pathfind target
/// ledger is distinct from xrpl.ledger.seq.
inline constexpr auto ledgerIndex =
join(join(seg::xrpl, makeStr("pathfind")), makeStr("ledger_index"));
} // namespace attr
} // namespace xrpl::telemetry::pathfind_span

View File

@@ -168,11 +168,9 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
span.setAttribute(rpc_span::attr::command, name.c_str());
span.setAttribute(rpc_span::attr::version, static_cast<int64_t>(context.apiVersion));
span.setAttribute(
rpc_span::attr::role,
rpc_span::attr::rpcRole,
context.role == Role::ADMIN ? std::string_view(rpc_span::val::admin)
: std::string_view(rpc_span::val::user));
span.setAttribute(attr::nodeAmendmentBlocked, context.app.getOPs().isAmendmentBlocked());
span.setAttribute(attr::nodeServerState, context.app.getOPs().strOperatingMode());
static std::atomic<std::uint64_t> requestId{0};
auto& perfLog = context.app.getPerfLog();
@@ -189,7 +187,7 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
JLOG(context.j.debug()) << "RPC call " << name << " completed in "
<< ((end - start).count() / 1000000000.0) << "seconds";
perfLog.rpcFinish(name, curId);
span.setAttribute(rpc_span::attr::status, rpc_span::val::success);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::success);
return ret;
}
catch (std::exception& e)
@@ -197,7 +195,7 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
perfLog.rpcError(name, curId);
JLOG(context.j.info()) << "Caught throw: " << e.what();
span.recordException(e);
span.setAttribute(rpc_span::attr::status, rpc_span::val::error);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error);
if (context.loadType == Resource::feeReferenceRPC)
context.loadType = Resource::feeExceptionRPC;

View File

@@ -14,8 +14,99 @@
* auto span = SpanGuard::span(
* TraceCategory::Rpc, rpc_span::prefix::command, "submit");
* span.setAttribute(rpc_span::attr::command, "submit");
* span.setAttribute(rpc_span::attr::status, rpc_span::val::success);
* span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::success);
* @endcode
*
* Span hierarchy (automatic nesting via OTel thread-local context):
*
* HTTP JSON-RPC path (single request):
*
* +-------------------------------------------------------+
* | rpc.http_request |
* | ServerHandler::processSession(Session) |
* | |
* | +--------------------------------------------------+ |
* | | rpc.process | |
* | | ServerHandler::processRequest() | |
* | | | |
* | | +---------------------------------------------+ | |
* | | | rpc.command.{name} | | |
* | | | RPC::callMethod() | | |
* | | | attrs: command, version, rpc_role, rpc_status | | |
* | | +---------------------------------------------+ | |
* | +--------------------------------------------------+ |
* +-------------------------------------------------------+
*
* HTTP batch path (multiple commands per request):
*
* +-------------------------------------------------------+
* | rpc.http_request |
* | |
* | +--------------------------------------------------+ |
* | | rpc.process | |
* | | | |
* | | +------------------+ +------------------+ | |
* | | | rpc.command.{a} | | rpc.command.{b} | ... | |
* | | +------------------+ +------------------+ | |
* | +--------------------------------------------------+ |
* +-------------------------------------------------------+
*
* WebSocket path:
*
* +-------------------------------------------------------+
* | rpc.ws_message |
* | ServerHandler::processSession(WSSession) |
* | |
* | +--------------------------------------------------+ |
* | | rpc.command.{name} | |
* | | RPC::callMethod() | |
* | | attrs: command, version, rpc_role, rpc_status | |
* | +--------------------------------------------------+ |
* +-------------------------------------------------------+
*
* WebSocket error paths:
*
* +-------------------------------------------------------+
* | rpc.ws_message (error: invalid_json) |
* | ServerHandler::onWSMessage() — parse failure |
* +-------------------------------------------------------+
*
* +-------------------------------------------------------+
* | rpc.ws_upgrade |
* | ServerHandler::onHandoff() — upgrade try/catch |
* +-------------------------------------------------------+
*
* Command dispatch error path:
*
* +-------------------------------------------------------+
* | rpc.command.{name} (error: too_busy/unknown/etc) |
* | RPC::doCommand() — fillHandler() rejection |
* +-------------------------------------------------------+
*
* gRPC path (see GrpcSpanNames.h for constants):
*
* +-------------------------------------------------------+
* | grpc.request |
* | CallData::process(coro) |
* | attrs: method, grpc_status |
* +-------------------------------------------------------+
*
* Covered paths:
* - HTTP JSON-RPC (single and batch requests)
* - WebSocket RPC commands
* - WebSocket message parse errors (invalid JSON, oversized)
* - WebSocket upgrade failures (protocol handshake errors)
* - Admin CLI (connects via HTTP internally)
* - Command dispatch rejections (unknown cmd, too busy, no perm)
* - gRPC endpoints (GetLedger, GetLedgerData, GetLedgerDiff,
* GetLedgerEntry)
* - Command execution: timing, success/failure, exceptions
* - Per-command attributes: name, API version, rpc_role, rpc_status
*
* Known gaps (not yet instrumented):
* - Early validation errors in processRequest() before rpc.process
* span (malformed JSON, auth failures, oversized requests)
* - Subscription push notifications (server-initiated, not RPC)
*/
#include <xrpl/telemetry/SpanNames.h>
@@ -43,18 +134,16 @@ inline constexpr auto process = makeStr("process");
// ===== Attribute keys ======================================================
namespace attr {
inline constexpr auto xrplRpc = join(seg::xrpl, seg::rpc);
/// "xrpl.rpc.command"
inline constexpr auto command = join(xrplRpc, makeStr("command"));
/// "xrpl.rpc.version"
inline constexpr auto version = join(xrplRpc, makeStr("version"));
/// "xrpl.rpc.role"
inline constexpr auto role = join(xrplRpc, makeStr("role"));
/// "xrpl.rpc.status"
inline constexpr auto status = join(xrplRpc, makeStr("status"));
/// "xrpl.rpc.payload_size"
inline constexpr auto payloadSize = join(xrplRpc, makeStr("payload_size"));
/// "command" — RPC method name.
inline constexpr auto command = makeStr("command");
/// "version" — api_version per request.
inline constexpr auto version = makeStr("version");
/// "rpc_role" — admin|user. Domain-qualified: collides with grpc_role.
inline constexpr auto rpcRole = makeStr("rpc_role");
/// "rpc_status" — success|error. Domain-qualified: avoids OTel reserved span status.
inline constexpr auto rpcStatus = makeStr("rpc_status");
/// "request_payload_size" — bytes of inbound request payload.
inline constexpr auto requestPayloadSize = makeStr("request_payload_size");
} // namespace attr
// ===== Attribute values ====================================================

View File

@@ -513,7 +513,7 @@ ServerHandler::processSession(
JLOG(m_journal.error()) << "Exception while processing WS: " << ex.what() << "\n"
<< "Input JSON: " << Json::Compact{Json::Value{jv}};
span.recordException(ex);
span.setAttribute(rpc_span::attr::status, rpc_span::val::error);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error);
// LCOV_EXCL_STOP
}
@@ -904,7 +904,7 @@ ServerHandler::processRequest(
<< "Internal error : " << ex.what()
<< " when processing request: " << Json::Compact{Json::Value{params}};
span.recordException(ex);
span.setAttribute(rpc_span::attr::status, rpc_span::val::error);
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::error);
// LCOV_EXCL_STOP
}