diff --git a/docker/telemetry/grafana/dashboards/consensus-health.json b/docker/telemetry/grafana/dashboards/consensus-health.json index db0381b9ec..f0787a5390 100644 --- a/docker/telemetry/grafana/dashboards/consensus-health.json +++ b/docker/telemetry/grafana/dashboards/consensus-health.json @@ -95,7 +95,7 @@ }, { "title": "Ledger Close Duration", - "description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp) measures the time from when consensus triggers a ledger close to completion. Carries xrpl.ledger.seq and xrpl.consensus.mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.", + "description": "p95 duration of the ledger close event. The consensus.ledger_close span (RCLConsensus.cpp) measures the time from when consensus triggers a ledger close to completion. Carries ledger_seq and consensus_mode attributes. Compare with Consensus Round Duration to understand how close timing relates to overall round time.", "type": "timeseries", "gridPos": { "h": 8, @@ -134,7 +134,7 @@ }, { "title": "Validation Send Rate", - "description": "Rate at which this node sends ledger validations to the network. Sourced from the consensus.validation.send span (RCLConsensus.cpp). Each validation confirms the node has fully validated a ledger. The span carries xrpl.ledger.seq and proposing. Should closely track the ledger close rate when the node is healthy.", + "description": "Rate at which this node sends ledger validations to the network. Sourced from the consensus.validation.send span (RCLConsensus.cpp). Each validation confirms the node has fully validated a ledger. The span carries ledger_seq and proposing. Should closely track the ledger close rate when the node is healthy.", "type": "stat", "gridPos": { "h": 8, @@ -239,7 +239,7 @@ }, { "title": "Consensus Mode Over Time", - "description": "Breakdown of consensus ledger close events by the node's consensus mode (Proposing, Observing, Wrong Ledger, Switched Ledger). Grouped by the xrpl.consensus.mode span attribute from consensus.ledger_close. A healthy validator should be predominantly in Proposing mode. Frequent Wrong Ledger or Switched Ledger indicates sync issues.", + "description": "Breakdown of consensus ledger close events by the node's consensus mode (Proposing, Observing, Wrong Ledger, Switched Ledger). Grouped by the consensus_mode span attribute from consensus.ledger_close. A healthy validator should be predominantly in Proposing mode. Frequent Wrong Ledger or Switched Ledger indicates sync issues.", "type": "timeseries", "gridPos": { "h": 8, diff --git a/docker/telemetry/grafana/dashboards/transaction-overview.json b/docker/telemetry/grafana/dashboards/transaction-overview.json index 02da67e9ea..3e699b8fdf 100644 --- a/docker/telemetry/grafana/dashboards/transaction-overview.json +++ b/docker/telemetry/grafana/dashboards/transaction-overview.json @@ -80,15 +80,15 @@ "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])))", - "legendFormat": "p95 {{tx_type}}" + "expr": "histogram_quantile(0.95, sum by (le, tx_type, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])))", + "legendFormat": "P95 {{tx_type}} [{{exported_instance}}]" }, { "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.50, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])))", - "legendFormat": "p50 {{tx_type}}" + "expr": "histogram_quantile(0.50, sum by (le, tx_type, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.process\", tx_type=~\"$tx_type\"}[5m])))", + "legendFormat": "P50 {{tx_type}} [{{exported_instance}}]" } ], "fieldConfig": { @@ -489,8 +489,8 @@ "datasource": { "type": "prometheus" }, - "expr": "histogram_quantile(0.95, sum by (le, tx_type) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.transactor\", tx_type=~\"$tx_type\"}[5m])))", - "legendFormat": "p95 {{tx_type}}" + "expr": "histogram_quantile(0.95, sum by (le, tx_type, exported_instance) (rate(traces_span_metrics_duration_milliseconds_bucket{exported_instance=~\"$node\", span_name=\"tx.transactor\", tx_type=~\"$tx_type\"}[5m])))", + "legendFormat": "P95 {{tx_type}} [{{exported_instance}}]" } ], "fieldConfig": { diff --git a/src/xrpld/app/ledger/LedgerHistory.cpp b/src/xrpld/app/ledger/LedgerHistory.cpp index 77c542fb16..092e88e28a 100644 --- a/src/xrpld/app/ledger/LedgerHistory.cpp +++ b/src/xrpld/app/ledger/LedgerHistory.cpp @@ -377,11 +377,15 @@ LedgerHistory::handleMismatch( JLOG(j_.error()) << "MISMATCH on consensus transaction set " << " built: " << to_string(*builtConsensusHash) << " validated: " << to_string(*validatedConsensusHash); + // The consensus tx-set hashes disagree — this is the root cause, + // so record it as the single reason and stop. The tx-level + // comparison below would otherwise double-count the same mismatch. recordReason("consensus_txset"); + return; } - else - JLOG(j_.error()) << "MISMATCH with same consensus transaction set: " - << to_string(*builtConsensusHash); + + JLOG(j_.error()) << "MISMATCH with same consensus transaction set: " + << to_string(*builtConsensusHash); } // Find differences between built and valid ledgers