mirror of
https://github.com/Xahau/xahaud.git
synced 2025-11-18 17:45:48 +00:00
Don't reach consensus as quickly if no other proposals seen: (#4763)
This fixes a case where a peer can desync under a certain timing circumstance--if it reaches a certain point in consensus before it receives proposals. This was noticed under high transaction volumes. Namely, when we arrive at the point of deciding whether consensus is reached after minimum establish phase duration but before having received any proposals. This could be caused by finishing the previous round slightly faster and/or having some delay in receiving proposals. Existing behavior arrives at consensus immediately after the minimum establish duration with no proposals. This causes us to desync because we then close a non-validated ledger. The change in this PR causes us to wait for a configured threshold before making the decision to arrive at consensus with no proposals. This allows validators to catch up and for brief delays in receiving proposals to be absorbed. There should be no drawback since, with no proposals coming in, we needn't be in a huge rush to jump ahead.
This commit is contained in:
@@ -87,11 +87,24 @@ checkConsensusReached(
|
||||
std::size_t agreeing,
|
||||
std::size_t total,
|
||||
bool count_self,
|
||||
std::size_t minConsensusPct)
|
||||
std::size_t minConsensusPct,
|
||||
bool reachedMax)
|
||||
{
|
||||
// If we are alone, we have a consensus
|
||||
// If we are alone for too long, we have consensus.
|
||||
// Delaying consensus like this avoids a circumstance where a peer
|
||||
// gets ahead of proposers insofar as it has not received any proposals.
|
||||
// This could happen if there's a slowdown in receiving proposals. Reaching
|
||||
// consensus prematurely in this way means that the peer will likely desync.
|
||||
// The check for reachedMax should allow plenty of time for proposals to
|
||||
// arrive, and there should be no downside. If a peer is truly not
|
||||
// receiving any proposals, then there should be no hurry. There's
|
||||
// really nowhere to go.
|
||||
if (total == 0)
|
||||
return true;
|
||||
{
|
||||
if (reachedMax)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (count_self)
|
||||
{
|
||||
@@ -120,7 +133,13 @@ checkConsensus(
|
||||
<< prevProposers << " agree=" << currentAgree
|
||||
<< " validated=" << currentFinished
|
||||
<< " time=" << currentAgreeTime.count() << "/"
|
||||
<< previousAgreeTime.count();
|
||||
<< previousAgreeTime.count() << " proposing? " << proposing
|
||||
<< " minimum duration to reach consensus: "
|
||||
<< parms.ledgerMIN_CONSENSUS.count() << "ms"
|
||||
<< " max consensus time "
|
||||
<< parms.ledgerMAX_CONSENSUS.count() << "s"
|
||||
<< " minimum consensus percentage: "
|
||||
<< parms.minCONSENSUS_PCT;
|
||||
|
||||
if (currentAgreeTime <= parms.ledgerMIN_CONSENSUS)
|
||||
return ConsensusState::No;
|
||||
@@ -139,7 +158,11 @@ checkConsensus(
|
||||
// Have we, together with the nodes on our UNL list, reached the threshold
|
||||
// to declare consensus?
|
||||
if (checkConsensusReached(
|
||||
currentAgree, currentProposers, proposing, parms.minCONSENSUS_PCT))
|
||||
currentAgree,
|
||||
currentProposers,
|
||||
proposing,
|
||||
parms.minCONSENSUS_PCT,
|
||||
currentAgreeTime > parms.ledgerMAX_CONSENSUS))
|
||||
{
|
||||
JLOG(j.debug()) << "normal consensus";
|
||||
return ConsensusState::Yes;
|
||||
@@ -148,7 +171,11 @@ checkConsensus(
|
||||
// Have sufficient nodes on our UNL list moved on and reached the threshold
|
||||
// to declare consensus?
|
||||
if (checkConsensusReached(
|
||||
currentFinished, currentProposers, false, parms.minCONSENSUS_PCT))
|
||||
currentFinished,
|
||||
currentProposers,
|
||||
false,
|
||||
parms.minCONSENSUS_PCT,
|
||||
currentAgreeTime > parms.ledgerMAX_CONSENSUS))
|
||||
{
|
||||
JLOG(j.warn()) << "We see no consensus, but 80% of nodes have moved on";
|
||||
return ConsensusState::MovedOn;
|
||||
|
||||
@@ -1155,7 +1155,7 @@ Consensus<Adaptor>::shouldPause() const
|
||||
std::size_t const offline = trustedKeys.size();
|
||||
|
||||
std::stringstream vars;
|
||||
vars << " (working seq: " << previousLedger_.seq() << ", "
|
||||
vars << " consensuslog (working seq: " << previousLedger_.seq() << ", "
|
||||
<< "validated seq: " << adaptor_.getValidLedgerIndex() << ", "
|
||||
<< "am validator: " << adaptor_.validator() << ", "
|
||||
<< "have validated: " << adaptor_.haveValidated() << ", "
|
||||
|
||||
@@ -86,7 +86,7 @@ struct ConsensusParms
|
||||
* validators don't appear to be offline that are merely waiting for
|
||||
* laggards.
|
||||
*/
|
||||
std::chrono::milliseconds ledgerMAX_CONSENSUS = std::chrono::seconds{10};
|
||||
std::chrono::milliseconds ledgerMAX_CONSENSUS = std::chrono::seconds{15};
|
||||
|
||||
//! Minimum number of seconds to wait to ensure others have computed the LCL
|
||||
std::chrono::milliseconds ledgerMIN_CLOSE = std::chrono::seconds{2};
|
||||
|
||||
@@ -109,10 +109,15 @@ public:
|
||||
ConsensusState::MovedOn ==
|
||||
checkConsensus(10, 2, 1, 8, 3s, 10s, p, true, journal_));
|
||||
|
||||
// No peers makes it easy to agree
|
||||
// If no peers, don't agree until time has passed.
|
||||
BEAST_EXPECT(
|
||||
ConsensusState::No ==
|
||||
checkConsensus(0, 0, 0, 0, 3s, 10s, p, true, journal_));
|
||||
|
||||
// Agree if no peers and enough time has passed.
|
||||
BEAST_EXPECT(
|
||||
ConsensusState::Yes ==
|
||||
checkConsensus(0, 0, 0, 0, 3s, 10s, p, true, journal_));
|
||||
checkConsensus(0, 0, 0, 0, 3s, 16s, p, true, journal_));
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
Reference in New Issue
Block a user