Improve transaction relay logic (#4985)

Combines four related changes:
1. "Decrease `shouldRelay` limit to 30s." Pretty self-explanatory. Currently, the limit is 5 minutes, by which point the `HashRouter` entry could have expired, making this transaction look brand new (and thus causing it to be relayed back to peers which have sent it to us recently).
2.  "Give a transaction more chances to be retried." Will put a transaction into `LedgerMaster`'s held transactions if the transaction gets a `ter`, `tel`, or `tef` result. Old behavior was just `ter`.
     * Additionally, to prevent a transaction from being repeatedly held indefinitely, it must meet some extra conditions. (Documented in a comment in the code.)
3. "Pop all transactions with sequential sequences, or tickets." When a transaction is processed successfully, currently, one held transaction for the same account (if any) will be popped out of the held transactions list, and queued up for the next transaction batch. This change pops all transactions for the account, but only if they have sequential sequences (for non-ticket transactions) or use a ticket. This issue was identified from interactions with @mtrippled's #4504, which was merged, but unfortunately reverted later by #4852. When the batches were spaced out, it could potentially take a very long time for a large number of held transactions for an account to get processed through. However, whether batched or not, this change will help get held transactions cleared out, particularly if a missing earlier transaction is what held them up.
4. "Process held transactions through existing NetworkOPs batching." In the current processing, at the end of each consensus round, all held transactions are directly applied to the open ledger, then the held list is reset. This bypasses all of the logic in `NetworkOPs::apply` which, among other things, broadcasts successful transactions to peers. This means that the transaction may not get broadcast to peers for a really long time (5 minutes in the current implementation, or 30 seconds with this first commit). If the node is a bottleneck (either due to network configuration, or because the transaction was submitted locally), the transaction may not be seen by any other nodes or validators before it expires or causes other problems.
This commit is contained in:
Ed Hennis
2025-05-01 13:58:18 -04:00
committed by GitHub
parent 3502df2174
commit 4a084ce34c
12 changed files with 440 additions and 73 deletions

View File

@@ -18,6 +18,7 @@
//==============================================================================
#include <xrpld/app/misc/HashRouter.h>
#include <xrpld/core/Config.h>
#include <xrpl/basics/chrono.h>
#include <xrpl/beast/unit_test.h>
@@ -27,12 +28,22 @@ namespace test {
class HashRouter_test : public beast::unit_test::suite
{
HashRouter::Setup
getSetup(std::chrono::seconds hold, std::chrono::seconds relay)
{
HashRouter::Setup setup;
setup.holdTime = hold;
setup.relayTime = relay;
return setup;
}
void
testNonExpiration()
{
testcase("Non-expiration");
using namespace std::chrono_literals;
TestStopwatch stopwatch;
HashRouter router(stopwatch, 2s);
HashRouter router(getSetup(2s, 1s), stopwatch);
uint256 const key1(1);
uint256 const key2(2);
@@ -67,9 +78,10 @@ class HashRouter_test : public beast::unit_test::suite
void
testExpiration()
{
testcase("Expiration");
using namespace std::chrono_literals;
TestStopwatch stopwatch;
HashRouter router(stopwatch, 2s);
HashRouter router(getSetup(2s, 1s), stopwatch);
uint256 const key1(1);
uint256 const key2(2);
@@ -144,10 +156,11 @@ class HashRouter_test : public beast::unit_test::suite
void
testSuppression()
{
testcase("Suppression");
// Normal HashRouter
using namespace std::chrono_literals;
TestStopwatch stopwatch;
HashRouter router(stopwatch, 2s);
HashRouter router(getSetup(2s, 1s), stopwatch);
uint256 const key1(1);
uint256 const key2(2);
@@ -173,9 +186,10 @@ class HashRouter_test : public beast::unit_test::suite
void
testSetFlags()
{
testcase("Set Flags");
using namespace std::chrono_literals;
TestStopwatch stopwatch;
HashRouter router(stopwatch, 2s);
HashRouter router(getSetup(2s, 1s), stopwatch);
uint256 const key1(1);
BEAST_EXPECT(router.setFlags(key1, 10));
@@ -186,9 +200,10 @@ class HashRouter_test : public beast::unit_test::suite
void
testRelay()
{
testcase("Relay");
using namespace std::chrono_literals;
TestStopwatch stopwatch;
HashRouter router(stopwatch, 1s);
HashRouter router(getSetup(50s, 1s), stopwatch);
uint256 const key1(1);
@@ -229,9 +244,10 @@ class HashRouter_test : public beast::unit_test::suite
void
testProcess()
{
testcase("Process");
using namespace std::chrono_literals;
TestStopwatch stopwatch;
HashRouter router(stopwatch, 5s);
HashRouter router(getSetup(5s, 1s), stopwatch);
uint256 const key(1);
HashRouter::PeerShortID peer = 1;
int flags;
@@ -243,6 +259,111 @@ class HashRouter_test : public beast::unit_test::suite
BEAST_EXPECT(router.shouldProcess(key, peer, flags, 1s));
}
void
testSetup()
{
testcase("setup_HashRouter");
using namespace std::chrono_literals;
{
Config cfg;
// default
auto const setup = setup_HashRouter(cfg);
BEAST_EXPECT(setup.holdTime == 300s);
BEAST_EXPECT(setup.relayTime == 30s);
}
{
Config cfg;
// non-default
auto& h = cfg.section("hashrouter");
h.set("hold_time", "600");
h.set("relay_time", "15");
auto const setup = setup_HashRouter(cfg);
BEAST_EXPECT(setup.holdTime == 600s);
BEAST_EXPECT(setup.relayTime == 15s);
}
{
Config cfg;
// equal
auto& h = cfg.section("hashrouter");
h.set("hold_time", "400");
h.set("relay_time", "400");
auto const setup = setup_HashRouter(cfg);
BEAST_EXPECT(setup.holdTime == 400s);
BEAST_EXPECT(setup.relayTime == 400s);
}
{
Config cfg;
// wrong order
auto& h = cfg.section("hashrouter");
h.set("hold_time", "60");
h.set("relay_time", "120");
try
{
setup_HashRouter(cfg);
fail();
}
catch (std::exception const& e)
{
std::string expected =
"HashRouter relay time must be less than or equal to hold "
"time";
BEAST_EXPECT(e.what() == expected);
}
}
{
Config cfg;
// too small hold
auto& h = cfg.section("hashrouter");
h.set("hold_time", "10");
h.set("relay_time", "120");
try
{
setup_HashRouter(cfg);
fail();
}
catch (std::exception const& e)
{
std::string expected =
"HashRouter hold time must be at least 12 seconds (the "
"approximate validation time for three "
"ledgers).";
BEAST_EXPECT(e.what() == expected);
}
}
{
Config cfg;
// too small relay
auto& h = cfg.section("hashrouter");
h.set("hold_time", "500");
h.set("relay_time", "6");
try
{
setup_HashRouter(cfg);
fail();
}
catch (std::exception const& e)
{
std::string expected =
"HashRouter relay time must be at least 8 seconds (the "
"approximate validation time for two ledgers).";
BEAST_EXPECT(e.what() == expected);
}
}
{
Config cfg;
// garbage
auto& h = cfg.section("hashrouter");
h.set("hold_time", "alice");
h.set("relay_time", "bob");
auto const setup = setup_HashRouter(cfg);
// The set function ignores values that don't covert, so the
// defaults are left unchanged
BEAST_EXPECT(setup.holdTime == 300s);
BEAST_EXPECT(setup.relayTime == 30s);
}
}
public:
void
run() override
@@ -253,6 +374,7 @@ public:
testSetFlags();
testRelay();
testProcess();
testSetup();
}
};