Use LedgerTrie for preferred ledger (RIPD-1551):

These changes augment the Validations class with a LedgerTrie to better track the history of support for validated ledgers. This improves the selection of the preferred working ledger for consensus. The Validations class now tracks both full and partial validations. Partial validations are only used to determine the working ledger; full validations are required for any quorum related function. Validators are also now explicitly restricted to sending validations with increasing ledger sequence number.
2025-12-06 17:27:52 +00:00 · 2017-12-07 11:00:42 -05:00
parent 1c44c4a43e
commit 94c6a2a850
26 changed files with 3648 additions and 1043 deletions
--- a/src/test/consensus/Consensus_test.cpp
+++ b/src/test/consensus/Consensus_test.cpp
@@ -512,8 +512,7 @@ public:
                                peerJumps.closeJumps.front();
                            // Jump is to a different chain
                            BEAST_EXPECT(jump.from.seq() <= jump.to.seq());
-                            BEAST_EXPECT(
-                                !sim.oracle.isAncestor(jump.from, jump.to));
+                            BEAST_EXPECT(!jump.to.isAncestor(jump.from));
                        }
                    }
                    // fully validated jump forward in same chain
@@ -525,8 +524,7 @@ public:
                                peerJumps.fullyValidatedJumps.front();
                            // Jump is to a different chain with same seq
                            BEAST_EXPECT(jump.from.seq() < jump.to.seq());
-                            BEAST_EXPECT(
-                                sim.oracle.isAncestor(jump.from, jump.to));
+                            BEAST_EXPECT(jump.to.isAncestor(jump.from));
                        }
                    }
                }
@@ -825,6 +823,168 @@ public:
        BEAST_EXPECT(sim.synchronized());
    }

+
+    // Helper collector for testPreferredByBranch
+    // Invasively disconnects network at bad times to cause splits
+    struct Disruptor
+    {
+        csf::PeerGroup& network;
+        csf::PeerGroup& groupCfast;
+        csf::PeerGroup& groupCsplit;
+        csf::SimDuration delay;
+        bool reconnected = false;
+
+        Disruptor(
+            csf::PeerGroup& net,
+            csf::PeerGroup& c,
+            csf::PeerGroup& split,
+            csf::SimDuration d)
+            : network(net), groupCfast(c), groupCsplit(split), delay(d)
+        {
+        }
+
+        template <class E>
+        void
+        on(csf::PeerID, csf::SimTime, E const&)
+        {
+        }
+
+
+        void
+        on(csf::PeerID who, csf::SimTime, csf::FullyValidateLedger const& e)
+        {
+            using namespace std::chrono;
+            // As soon as the the fastC node fully validates C, disconnect
+            // ALL c nodes from the network. The fast C node needs to disconnect
+            // as well to prevent it from relaying the validations it did see
+            if (who == groupCfast[0]->id &&
+                e.ledger.seq() == csf::Ledger::Seq{2})
+            {
+                network.disconnect(groupCsplit);
+                network.disconnect(groupCfast);
+            }
+        }
+
+        void
+        on(csf::PeerID who, csf::SimTime, csf::AcceptLedger const& e)
+        {
+            // As soon as anyone generates a child of B or C, reconnect the
+            // network so those validations make it through
+            if (!reconnected && e.ledger.seq() == csf::Ledger::Seq{3})
+            {
+                reconnected = true;
+                network.connect(groupCsplit, delay);
+            }
+        }
+
+
+    };
+
+    void
+    testPreferredByBranch()
+    {
+        using namespace csf;
+        using namespace std::chrono;
+
+        // Simulate network splits that are prevented from forking when using
+        // preferred ledger by trie.  This is a contrived example that involves
+        // excessive network splits, but demonstrates the safety improvement
+        // from the preferred ledger by trie approach.
+
+        // Consider 10 validating nodes that comprise a single common UNL
+        // Ledger history:
+        // 1:           A
+        //            _/ \_
+        // 2:         B    C
+        //          _/  _/  \_
+        // 3:       D   C'  |||||||| (8 different ledgers)
+
+        // - All nodes generate the common ledger A
+        // - 2 nodes generate B and 8 nodes generate C
+        // - Only 1 of the C nodes sees all the C validations and fully
+        //   validates C. The rest of the C nodes split at just the right time
+        //   such that they never see any C validations but their own.
+        // - The C nodes continue and generate 8 different child ledgers.
+        // - Meanwhile, the D nodes only saw 1 validation for C and 2 validations
+        //   for B.
+        // - The network reconnects and the validations for generation 3 ledgers
+        //   are observed (D and the 8 C's)
+        // - In the old approach, 2 votes for D outweights 1 vote for each C'
+        //   so the network would avalanche towards D and fully validate it
+        //   EVEN though C was fully validated by one node
+        // - In the new approach, 2 votes for D are not enough to outweight the
+        //   8 implicit votes for C, so nodes will avalanche to C instead
+
+
+        ConsensusParms const parms{};
+        Sim sim;
+
+        // Goes A->B->D
+        PeerGroup groupABD = sim.createGroup(2);
+        // Single node that initially fully validates C before the split
+        PeerGroup groupCfast = sim.createGroup(1);
+        // Generates C, but fails to fully validate before the split
+        PeerGroup groupCsplit = sim.createGroup(7);
+
+        PeerGroup groupNotFastC = groupABD + groupCsplit;
+        PeerGroup network = groupABD + groupCsplit + groupCfast;
+
+        SimDuration delay = round<milliseconds>(0.2 * parms.ledgerGRANULARITY);
+        SimDuration fDelay = round<milliseconds>(0.1 * parms.ledgerGRANULARITY);
+
+        network.trust(network);
+        // C must have a shorter delay to see all the validations before the
+        // other nodes
+        network.connect(groupCfast, fDelay);
+        // The rest of the network is connected at the same speed
+        groupNotFastC.connect(groupNotFastC, delay);
+
+        Disruptor dc(network, groupCfast, groupCsplit, delay);
+        sim.collectors.add(dc);
+
+        // Consensus round to generate ledger A
+        sim.run(1);
+        BEAST_EXPECT(sim.synchronized());
+
+        // Next round generates B and C
+        // To force B, we inject an extra transaction in to those nodes
+        for(Peer * peer : groupABD)
+        {
+            peer->txInjections.emplace(
+                    peer->lastClosedLedger.seq(), Tx{42});
+        }
+        // The Disruptor will ensure that nodes disconnect before the C
+        // validations make it to all but the fastC node
+        sim.run(1);
+
+        // We are no longer in sync, but have not yet forked:
+        // 9 nodes consider A the last fully validated ledger and fastC sees C
+        BEAST_EXPECT(!sim.synchronized());
+        BEAST_EXPECT(sim.branches() == 1);
+
+        //  Run another round to generate the 8 different C' ledgers
+        for (Peer * p : network)
+            p->submit(Tx(static_cast<std::uint32_t>(p->id)));
+        sim.run(1);
+
+        // Still not forked
+        BEAST_EXPECT(!sim.synchronized());
+        BEAST_EXPECT(sim.branches() == 1);
+
+        // Disruptor will reconnect all but the fastC node
+        sim.run(1);
+
+        if(BEAST_EXPECT(sim.branches() == 1))
+        {
+            BEAST_EXPECT(sim.synchronized());
+        }
+        else // old approach caused a fork
+        {
+            BEAST_EXPECT(sim.branches(groupNotFastC) == 1);
+            BEAST_EXPECT(sim.synchronized(groupNotFastC) == 1);
+        }
+    }
+
    void
    run() override
    {
@@ -839,6 +999,7 @@ public:
        testConsensusCloseTimeRounding();
        testFork();
        testHubNetwork();
+        testPreferredByBranch();
    }
 };