Compare commits

...

15 Commits

Author SHA1 Message Date
Bart
b55e344205 Defensive checks 2026-03-21 20:06:55 -04:00
Bart
8a896f318a Add tests, add gracefull stopping 2026-03-21 18:57:03 -04:00
Bart
0e26aadfe6 Make pending writes size_t to avoid casting everywhere 2026-03-21 10:24:24 -04:00
Bart
d029bcf2d0 Threadpool join in destructor, exception handling 2026-03-21 10:21:24 -04:00
Bart
403cab41e9 Fix thread parallelization calculation 2026-03-20 18:03:17 -04:00
Bart
59e6fbfe12 Merge branch 'develop' into bthomee/iops 2026-03-20 16:04:00 -04:00
Ayaz Salikhov
cf2eb149ee fix: Update .git-blame-ignore-revs (#6577) 2026-03-19 22:48:20 +00:00
Bart
d63f80f73c Merge branch 'develop' into bthomee/iops 2026-03-19 17:28:32 -04:00
Ayaz Salikhov
762922a07f chore: Don't allow files more than 400kb to be added to the repo (#6597) 2026-03-19 21:20:56 +00:00
Bart
dc5eb0ea50 Make class variables private 2026-03-19 08:36:24 -04:00
Bart
609024f15c Merge branch 'develop' into bthomee/iops 2026-03-19 06:32:59 -04:00
Bart
1bf5b0aa10 Add braces 2026-03-18 14:52:06 -04:00
Bart
f783a15bc8 Review feedback 2026-03-18 14:20:06 -04:00
Bart
5a94948a04 Merge branch 'develop' into bthomee/iops 2026-03-18 14:03:28 -04:00
Bart
f586382622 perf: Improve IOPS when reading from and writing to NuDB and RocksDB 2026-03-17 17:56:59 -04:00
11 changed files with 929 additions and 77 deletions

View File

@@ -1,16 +1,79 @@
# This feature requires Git >= 2.24
# To use it by default in git blame:
# git config blame.ignoreRevsFile .git-blame-ignore-revs
50760c693510894ca368e90369b0cc2dabfd07f3
e2384885f5f630c8f0ffe4bf21a169b433a16858
241b9ddde9e11beb7480600fd5ed90e1ef109b21
760f16f56835663d9286bd29294d074de26a7ba6
0eebe6a5f4246fced516d52b83ec4e7f47373edd
2189cc950c0cebb89e4e2fa3b2d8817205bf7cef
b9d007813378ad0ff45660dc07285b823c7e9855
fe9a5365b8a52d4acc42eb27369247e6f238a4f9
9a93577314e6a8d4b4a8368cc9d2b15a5d8303e8
552377c76f55b403a1c876df873a23d780fcc81c
97f0747e103f13e26e45b731731059b32f7679ac
b13370ac0d207217354f1fc1c29aef87769fb8a1
# This file is sorted in reverse chronological order, with the most recent commits at the top.
# The commits listed here are ignored by git blame, which is useful for formatting-only commits that would otherwise obscure the history of changes to a file.
# refactor: Enable remaining clang-tidy `cppcoreguidelines` checks (#6538)
72f4cb097f626b08b02fc3efcb4aa11cb2e7adb8
# refactor: Rename system name from 'ripple' to 'xrpld' (#6347)
ffea3977f0b771fe8e43a8f74e4d393d63a7afd8
# refactor: Update transaction folder structure (#6483)
5865bd017f777491b4a956f9210be0c4161f5442
# chore: Use gersemi instead of ancient cmake-format (#6486)
0c74270b055133a57a497b5c9fc5a75f7647b1f4
# chore: Apply clang-format width 100 (#6387)
2c1fad102353e11293e3edde1c043224e7d3e983
# chore: Set clang-format width to 100 in config file (#6387)
25cca465538a56cce501477f9e5e2c1c7ea2d84c
# chore: Set cmake-format width to 100 (#6386)
469ce9f291a4480c38d4ee3baca5136b2f053cd0
# refactor: Modularize app/tx (#6228)
0976b2b68b64972af8e6e7c497900b5bce9fe22f
# chore: Update clang-format to 21.1.8 (#6352)
958d8f375453d80bb1aa4c293b5102c045a3e4b4
# refactor: Replace include guards by '#pragma once' (#6322)
34ef577604782ca8d6e1c17df8bd7470990a52ff
# chore: Format all cmake files without comments (#6294)
fe9c8d568fcf6ac21483024e01f58962dd5c8260
# chore: Add cmake-format pre-commit hook (#6279)
a0e09187b9370805d027c611a7e9ff5a0125282a
# chore: Set ColumnLimit to 120 in clang-format (#6288)
5f638f55536def0d88b970d1018a465a238e55f4
# refactor: Fix typos in comments, configure cspell (#6164)
3c9f5b62525cb1d6ca1153eeb10433db7d7379fd
# refactor: Rename `rippled.cfg` to `xrpld.cfg` (#6098)
3d1b3a49b3601a0a7037fa0b19d5df7b5e0e2fc1
# refactor: Rename `ripple` namespace to `xrpl` (#5982)
1eb0fdac6543706b4b9ddca57fd4102928a1f871
# refactor: Rename `rippled` binary to `xrpld` (#5983)
9eb84a561ef8bb066d89f098bd9b4ac71baed67c
# refactor: Replaces secp256k1 source by Conan package (#6089)
813bc4d9491b078bb950f8255f93b02f71320478
# refactor: Remove unnecessary copyright notices already covered by LICENSE.md (#5929)
1d42c4f6de6bf01d1286fc7459b17a37a5189e88
# refactor: Rename `RIPPLE_` and `RIPPLED_` definitions to `XRPL_` (#5821)
ada83564d894829424b0f4d922b0e737e07abbf7
# refactor: Modularize shamap and nodestore (#5668)
8eb233c2ea8ad5a159be73b77f0f5e1496d547ac
# refactor: Modularise ledger (#5493)
dc8b37a52448b005153c13a7f046ad494128cf94
# chore: Update clang-format and prettier with pre-commit (#5709)
c14ce956adeabe476ad73c18d73103f347c9c613
# chore: Fix file formatting (#5718)
896b8c3b54a22b0497cb0d1ce95e1095f9a227ce
# chore: Reverts formatting changes to external files, adds formatting changes to proto files (#5711)
b13370ac0d207217354f1fc1c29aef87769fb8a1
# chore: Run prettier on all files (#5657)
97f0747e103f13e26e45b731731059b32f7679ac
# Reformat code with clang-format-18
552377c76f55b403a1c876df873a23d780fcc81c
# Recompute loops (#4997)
d028005aa6319338b0adae1aebf8abe113162960
# Rewrite includes (#4997)
1d23148e6dd53957fcb6205c07a5c6cd7b64d50c
# Rearrange sources (#4997)
e416ee72ca26fa0c09d2aee1b68bdfb2b7046eed
# Move CMake directory (#4997)
2e902dee53aab2a8f27f32971047bb81e022f94f
# Rewrite includes
0eebe6a5f4246fced516d52b83ec4e7f47373edd
# Format formerly .hpp files
760f16f56835663d9286bd29294d074de26a7ba6
# Rename .hpp to .h
241b9ddde9e11beb7480600fd5ed90e1ef109b21
# Consolidate external libraries
e2384885f5f630c8f0ffe4bf21a169b433a16858
# Format first-party source according to .clang-format
50760c693510894ca368e90369b0cc2dabfd07f3

View File

@@ -177,6 +177,7 @@ test.unit_test > xrpl.protocol
tests.libxrpl > xrpl.basics
tests.libxrpl > xrpl.json
tests.libxrpl > xrpl.net
tests.libxrpl > xrpl.nodestore
tests.libxrpl > xrpl.protocol
tests.libxrpl > xrpl.protocol_autogen
xrpl.conditions > xrpl.basics

View File

@@ -13,12 +13,11 @@ repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
hooks:
- id: check-added-large-files
args: [--maxkb=400, --enforce-all]
- id: trailing-whitespace
exclude: ^include/xrpl/protocol_autogen/(transactions|ledger_entries)/
- id: end-of-file-fixer
exclude: ^include/xrpl/protocol_autogen/(transactions|ledger_entries)/
- id: mixed-line-ending
exclude: ^include/xrpl/protocol_autogen/(transactions|ledger_entries)/
- id: check-merge-conflict
args: [--assume-in-merge]

View File

@@ -138,6 +138,22 @@ public:
/** Returns the number of file descriptors the backend expects to need. */
virtual int
fdRequired() const = 0;
/** The number of hardware threads to use for compression of a batch. */
static unsigned int const numHardwareThreads;
/** Calculate parallelization parameters for a batch of items.
Determines the number of threads and items per thread needed for parallel batch processing.
@param batchSize Number of items to process
@param maxThreadCount Maximum number of threads to use.
@return A pair of (numThreads, numItems) where numThreads is the exact number of threads to
use, and numItems is the number of items per thread. The last thread may process
fewer items.
*/
static std::pair<unsigned int, unsigned int>
calculateBatchParallelism(unsigned int batchSize, unsigned int maxThreadCount);
};
} // namespace NodeStore

View File

@@ -0,0 +1,68 @@
#include <xrpl/nodestore/Backend.h>
#include <algorithm>
#include <thread>
namespace xrpl {
namespace NodeStore {
// Initialize the static constant for hardware thread count. The `hardware_concurrency` function can
// return 0 on some platforms, in which case we default to 1. We limit the total number of threads
// to 8 to avoid contention.
unsigned int const Backend::numHardwareThreads = []() {
auto const hw = std::thread::hardware_concurrency();
return std::min(std::max(hw, 1u), 8u);
}();
std::pair<unsigned int, unsigned int>
Backend::calculateBatchParallelism(unsigned int batchSize, unsigned int maxThreadCount)
{
XRPL_ASSERT(
maxThreadCount > 0,
"xrpl::NodeStore::Backend::calculateBatchParallelism : maxThreadCount > 0");
if (maxThreadCount == 0)
{
// LCOV_EXCL_START
UNREACHABLE("xrpl::NodeStore::Backend::calculateBatchParallelism : maxThreadCount == 0");
return {1, batchSize};
// LCOV_EXCL_STOP
}
if (batchSize == 0)
{
return {0, 0};
}
// Estimate the number of threads using ceiling division: aim for at least 4 items per thread,
// but don't exceed the number of available threads.
auto const initialThreads = std::min((batchSize + 3u) / 4u, maxThreadCount);
// Calculate number of items per thread.
auto const numItems = (batchSize + initialThreads - 1u) / initialThreads;
// Calculate the actual number of threads needed. After rounding up numItems, we may need fewer
// threads than initially estimated.
auto const actualThreads = (batchSize + numItems - 1u) / numItems;
XRPL_ASSERT(
numItems <= batchSize,
"xrpl::NodeStore::Backend::calculateBatchParallelism : numItems <= batchSize");
XRPL_ASSERT(
actualThreads <= batchSize,
"xrpl::NodeStore::Backend::calculateBatchParallelism : actualThreads <= batchSize");
XRPL_ASSERT(
actualThreads <= maxThreadCount,
"xrpl::NodeStore::Backend::calculateBatchParallelism : actualThreads <= hwThreadCount");
if (numItems > batchSize || actualThreads > batchSize || actualThreads > maxThreadCount)
{
// LCOV_EXCL_START
UNREACHABLE("xrpl::NodeStore::Backend::calculateBatchParallelism : sanity check failed");
return {1, batchSize};
// LCOV_EXCL_STOP
}
return {actualThreads, numItems};
}
} // namespace NodeStore
} // namespace xrpl

View File

@@ -7,15 +7,22 @@
#include <xrpl/nodestore/detail/EncodedBlob.h>
#include <xrpl/nodestore/detail/codec.h>
#include <boost/asio/post.hpp>
#include <boost/asio/thread_pool.hpp>
#include <boost/filesystem.hpp>
#include <nudb/nudb.hpp>
#include <atomic>
#include <chrono>
#include <cstdint>
#include <cstdio>
#include <exception>
#include <latch>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
namespace xrpl {
namespace NodeStore {
@@ -23,21 +30,6 @@ namespace NodeStore {
class NuDBBackend : public Backend
{
public:
// "appnum" is an application-defined constant stored in the header of a
// NuDB database. We used it to identify shard databases before that code
// was removed. For now, its only use is a sanity check that the database
// was created by xrpld.
static constexpr std::uint64_t appnum = 1;
beast::Journal const j_;
size_t const keyBytes_;
std::size_t const burstSize_;
std::string const name_;
std::size_t const blockSize_;
nudb::store db_;
std::atomic<bool> deletePath_;
Scheduler& scheduler_;
NuDBBackend(
size_t keyBytes,
Section const& keyValues,
@@ -51,6 +43,7 @@ public:
, blockSize_(parseBlockSize(name_, keyValues, journal))
, deletePath_(false)
, scheduler_(scheduler)
, threadPool_(numHardwareThreads)
{
if (name_.empty())
Throw<std::runtime_error>("nodestore: Missing path in NuDB backend");
@@ -71,6 +64,7 @@ public:
, db_(context)
, deletePath_(false)
, scheduler_(scheduler)
, threadPool_(numHardwareThreads)
{
if (name_.empty())
Throw<std::runtime_error>("nodestore: Missing path in NuDB backend");
@@ -80,7 +74,32 @@ public:
{
try
{
// close can throw and we don't want the destructor to throw.
// Set shutdown flag to prevent new batch operations from starting. This must happen
// before stop() is called to ensure fetchBatch/storeBatch check the flag before posting
// any new tasks.
shutdown_.store(true, std::memory_order_release);
// Wait for all active operations to complete.
while (pendingReads_.load(std::memory_order_acquire) > 0 ||
pendingWrites_.load(std::memory_order_acquire) > 0)
{
std::this_thread::yield();
}
// Signal the thread pool to stop accepting new work. This ensures no new tasks will be
// posted after this point.
threadPool_.stop();
// Wait for all currently executing thread pool tasks to complete. This prevents worker
// threads from accessing the database after close().
threadPool_.join();
// Verify all writes have completed.
XRPL_ASSERT(
pendingWrites_.load() == 0, "xrpl::NuDBBackend::~NuDBBackend : pendingWrites == 0");
// Close the database. At this point, all threads have stopped and no pending reads and
// writes remain, so it's safe to close the database.
close();
}
catch (nudb::system_error const&) // NOLINT(bugprone-empty-catch)
@@ -109,9 +128,7 @@ public:
if (db_.is_open())
{
// LCOV_EXCL_START
UNREACHABLE(
"xrpl::NodeStore::NuDBBackend::open : database is already "
"open");
UNREACHABLE("xrpl::NodeStore::NuDBBackend::open : database is already open");
JLOG(j_.error()) << "database is already open";
return;
// LCOV_EXCL_STOP
@@ -127,16 +144,24 @@ public:
nudb::create<nudb::xxhasher>(
dp, kp, lp, appType, uid, salt, keyBytes_, blockSize_, 0.50, ec);
if (ec == nudb::errc::file_exists)
{
ec = {};
}
if (ec)
{
Throw<nudb::system_error>(ec);
}
}
db_.open(dp, kp, lp, ec);
if (ec)
{
Throw<nudb::system_error>(ec);
}
if (db_.appnum() != appnum)
{
Throw<std::runtime_error>("nodestore: unknown appnum");
}
db_.set_burst(burstSize_);
}
@@ -181,9 +206,22 @@ public:
Status
fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pno) override
{
// Increment pending reads counter on entry, decrement on exit. This ensures the destructor
// waits for this operation to complete.
++pendingReads_;
auto guard = [this](void*) { --pendingReads_; };
std::unique_ptr<void, decltype(guard)> opGuard(reinterpret_cast<void*>(1), guard);
// Check if we're shutting down. If so, return immediately instead of doing any work.
if (shutdown_.load(std::memory_order_acquire))
{
return backendError;
}
Status status = ok;
pno->reset();
nudb::error_code ec;
db_.fetch(
hash.data(),
[&hash, pno, &status](void const* data, std::size_t size) {
@@ -199,30 +237,119 @@ public:
status = ok;
},
ec);
if (ec == nudb::error::key_not_found)
{
return notFound;
}
if (ec)
{
Throw<nudb::system_error>(ec);
}
return status;
}
std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
fetchBatch(std::vector<uint256> const& hashes) override
{
std::vector<std::shared_ptr<NodeObject>> results;
results.reserve(hashes.size());
for (auto const& h : hashes)
if (hashes.empty())
{
std::shared_ptr<NodeObject> nObj;
Status status = fetch(h, &nObj);
if (status != ok)
return {{}, ok};
}
// Increment pending reads counter on entry, decrement on exit. This ensures the destructor
// waits for this operation to complete.
pendingReads_ += hashes.size();
auto guard = [this, &hashes](void*) { pendingReads_ -= hashes.size(); };
std::unique_ptr<void, decltype(guard)> opGuard(reinterpret_cast<void*>(1), guard);
// Check if we're shutting down. If so, return immediately instead of doing any work.
if (shutdown_.load(std::memory_order_acquire))
{
return {{}, backendError};
}
std::vector<std::shared_ptr<NodeObject>> results(hashes.size());
// Calculate parallelization parameters for the batch.
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(hashes.size(), numHardwareThreads);
// If we need only one thread, just do it sequentially. Although it should be impossible to
// get 0 threads here, handle it gracefully just in case.
if (numThreads <= 1u)
{
for (size_t i = 0; i < hashes.size(); ++i)
{
results.push_back({});
std::shared_ptr<NodeObject> nObj;
if (fetch(hashes[i], &nObj) == ok)
{
results[i] = nObj;
}
}
else
return {results, ok};
}
// Use a latch to synchronize task completion.
std::latch taskCompletion(numThreads);
// Track exceptions from worker threads.
std::exception_ptr eptr;
std::mutex emutex;
// Submit fetch tasks to the thread pool.
for (auto t = 0u; t < numThreads; ++t)
{
auto const startIdx = t * numItems;
XRPL_ASSERT(
startIdx < hashes.size(),
"xrpl::NuDBFactory::fetchBatch : startIdx < hashes.size()");
if (startIdx >= hashes.size())
{
results.push_back(nObj);
// This should never happen, but is kept as a safety check.
taskCompletion.count_down();
continue;
}
auto const endIdx = std::min<std::size_t>(startIdx + numItems, hashes.size());
auto task =
[this, &hashes, &results, &taskCompletion, &eptr, &emutex, startIdx, endIdx]() {
try
{
// Fetch the items assigned to this task.
for (size_t i = startIdx; i < endIdx; ++i)
{
std::shared_ptr<NodeObject> nObj;
if (fetch(hashes[i], &nObj) == ok)
{
results[i] = nObj;
}
}
}
catch (...)
{
// Store the first exception that occurs. Ensures count_down() is always
// called to prevent deadlock.
std::lock_guard<std::mutex> lock(emutex);
if (!eptr)
{
eptr = std::current_exception();
}
}
// Signal task completion.
taskCompletion.count_down();
};
boost::asio::post(threadPool_, std::move(task));
}
// Wait for all fetch tasks to complete.
taskCompletion.wait();
// Rethrow the first exception if one occurred.
if (eptr)
{
std::rethrow_exception(eptr);
}
return {results, ok};
@@ -232,21 +359,39 @@ public:
do_insert(std::shared_ptr<NodeObject> const& no)
{
EncodedBlob e(no);
nudb::error_code ec;
nudb::detail::buffer bf;
auto const result = nodeobject_compress(e.getData(), e.getSize(), bf);
nudb::error_code ec;
db_.insert(e.getKey(), result.first, result.second, ec);
if (ec && ec != nudb::error::key_exists)
{
Throw<nudb::system_error>(ec);
}
}
void
store(std::shared_ptr<NodeObject> const& no) override
{
// Increment pending writes counter on entry, decrement on exit. This ensures the destructor
// waits for this operation to complete.
++pendingWrites_;
auto guard = [this](void*) { --pendingWrites_; };
std::unique_ptr<void, decltype(guard)> opGuard(reinterpret_cast<void*>(1), guard);
// Check if we're shutting down. If so, return immediately instead of doing any work.
if (shutdown_.load(std::memory_order_acquire))
{
return;
}
BatchWriteReport report{};
report.writeCount = 1;
auto const start = std::chrono::steady_clock::now();
do_insert(no);
report.elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start);
scheduler_.onBatchWrite(report);
@@ -255,11 +400,127 @@ public:
void
storeBatch(Batch const& batch) override
{
if (batch.empty())
{
return;
}
// Increment pending writes counter on entry, decrement on exit. This ensures the destructor
// waits for this operation to complete.
pendingWrites_ += batch.size();
auto guard = [this, &batch](void*) { pendingWrites_ -= batch.size(); };
std::unique_ptr<void, decltype(guard)> opGuard(reinterpret_cast<void*>(1), guard);
// Check if we're shutting down. If so, return immediately instead of doing any work.
if (shutdown_.load(std::memory_order_acquire))
{
return;
}
BatchWriteReport report{};
report.writeCount = batch.size();
auto const start = std::chrono::steady_clock::now();
for (auto const& e : batch)
do_insert(e);
// Calculate parallelization parameters for the batch.
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batch.size(), numHardwareThreads);
// If we need only one thread, just do it sequentially. Although it should be impossible to
// get 0 threads here, handle it gracefully just in case.
if (numThreads <= 1u)
{
for (auto const& e : batch)
{
do_insert(e);
}
report.elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start);
scheduler_.onBatchWrite(report);
return;
}
// Helper struct that stores actual item data, not pointers, to avoid dangling references
// after EncodedBlob and buffer go out of scope in the thread.
struct CompressedData
{
std::vector<std::uint8_t> key;
std::vector<std::uint8_t> data;
std::exception_ptr eptr;
};
std::vector<CompressedData> compressed(batch.size());
// Use a latch to synchronize task completion.
std::latch taskCompletion(numThreads);
// Submit compression tasks to the thread pool.
for (auto t = 0u; t < numThreads; ++t)
{
auto const startIdx = t * numItems;
XRPL_ASSERT(
startIdx < batch.size(), "xrpl::NuDBFactory::storeBatch : startIdx < batch.size()");
if (startIdx >= batch.size())
{
// This should never happen, but is kept as a safety check.
taskCompletion.count_down();
continue;
}
auto const endIdx = std::min<std::size_t>(startIdx + numItems, batch.size());
auto task =
[&batch, &compressed, &taskCompletion, startIdx, endIdx, keyBytes = keyBytes_]() {
// Compress the items assigned to this task.
for (size_t i = startIdx; i < endIdx; ++i)
{
auto& item = compressed[i];
try
{
EncodedBlob e(batch[i]);
// Copy the key data to avoid dangling pointer.
auto const* keyPtr = static_cast<std::uint8_t const*>(e.getKey());
item.key.assign(keyPtr, keyPtr + keyBytes);
// Compress and copy the data to avoid dangling pointer.
nudb::detail::buffer bf;
auto const comp = nodeobject_compress(e.getData(), e.getSize(), bf);
auto const* dataPtr = static_cast<std::uint8_t const*>(comp.first);
item.data.assign(dataPtr, dataPtr + comp.second);
}
catch (...)
{
// Store the exception so it can be rethrown in the sequential phase
// below.
item.eptr = std::current_exception();
}
}
// Signal task completion.
taskCompletion.count_down();
};
boost::asio::post(threadPool_, std::move(task));
}
// Wait for all compression tasks to complete.
taskCompletion.wait();
// Insert the compressed data sequentially, since NuDB is designed as an append-only data
// store that limits concurrent writes.
for (auto const& item : compressed)
{
if (item.eptr)
{
std::rethrow_exception(item.eptr);
}
nudb::error_code ec;
db_.insert(item.key.data(), item.data.data(), item.data.size(), ec);
if (ec && ec != nudb::error::key_exists)
{
Throw<nudb::system_error>(ec);
}
}
report.elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start);
scheduler_.onBatchWrite(report);
@@ -276,7 +537,7 @@ public:
auto const dp = db_.dat_path();
auto const kp = db_.key_path();
auto const lp = db_.log_path();
// auto const appnum = db_.appnum();
nudb::error_code ec;
db_.close(ec);
if (ec)
@@ -310,7 +571,7 @@ public:
int
getWriteLoad() override
{
return 0;
return pendingWrites_.load();
}
void
@@ -385,6 +646,28 @@ private:
Throw<std::runtime_error>(s.str());
}
}
// "appnum" is an application-defined constant stored in the header of a
// NuDB database. We used it to identify shard databases before that code
// was removed. For now, its only use is a sanity check that the database
// was created by xrpld.
static constexpr std::uint64_t appnum = 1;
beast::Journal const j_;
size_t const keyBytes_;
std::size_t const burstSize_;
std::string const name_;
std::size_t const blockSize_;
nudb::store db_;
std::atomic<bool> deletePath_;
Scheduler& scheduler_;
std::atomic<size_t> pendingReads_{
0}; // Declare before threadPool_ to ensure it's destroyed after.
std::atomic<size_t> pendingWrites_{
0}; // Declare before threadPool_ to ensure it's destroyed after.
std::atomic<bool> shutdown_{
false}; // Declare before threadPool_ to ensure it's destroyed after.
boost::asio::thread_pool threadPool_; // Declare after db_ to ensure it's destroyed before.
};
//------------------------------------------------------------------------------

View File

@@ -13,6 +13,7 @@
#include <atomic>
#include <memory>
#include <thread>
namespace xrpl {
namespace NodeStore {
@@ -185,6 +186,41 @@ public:
}
}
// Enable pipelined writes for better write concurrency.
m_options.enable_pipelined_write = true;
// Set background job parallelism for better compaction/flush performance to the number of
// hardware threads, unless the value is explicitly provided in the config. The default is
// 2 (see include/rocksdb/options.h in the Conan dependency directory), so don't use fewer
// than that if no value is explicitly provided.
if (keyValues.exists("max_background_jobs"))
{
m_options.max_background_jobs = get<unsigned int>(keyValues, "max_background_jobs");
}
else if (auto v = numHardwareThreads; v > 2)
{
m_options.max_background_jobs = v;
}
// Set subcompactions for parallel compaction within a job to the number of hardware
// threads, unless the value is explicitly provided in the config. The default is 1 (see
// include/rocksdb/options.h in the Conan dependency directory), so don't use fewer
// than that if no value is explicitly provided.
if (keyValues.exists("max_subcompactions"))
{
m_options.max_subcompactions = get<unsigned int>(keyValues, "max_subcompactions");
}
else if (auto v = numHardwareThreads / 2; v > 1)
{
m_options.max_subcompactions = v;
}
// Enable direct I/O by default unless explicitly disabled in the config. This bypasses the
// OS page cache for better predictable performance on SSDs.
m_options.use_direct_reads = get<bool>(keyValues, "use_direct_io", true);
m_options.use_direct_io_for_flush_and_compaction =
get<bool>(keyValues, "use_direct_io", true);
std::string s1, s2;
rocksdb::GetStringFromDBOptions(&s1, m_options, "; ");
rocksdb::GetStringFromColumnFamilyOptions(&s2, m_options, "; ");
@@ -259,23 +295,19 @@ public:
rocksdb::ReadOptions const options;
rocksdb::Slice const slice(std::bit_cast<char const*>(hash.data()), m_keyBytes);
std::string string;
rocksdb::Status getStatus = m_db->Get(options, slice, &string);
if (getStatus.ok())
{
DecodedBlob decoded(hash.data(), string.data(), string.size());
if (decoded.wasOk())
{
*pObject = decoded.createObject();
}
else
{
// Decoding failed, probably corrupted!
//
// Decoding failed, probably corrupted.
status = dataCorrupt;
}
}
@@ -292,7 +324,6 @@ public:
else
{
status = Status(customCode + unsafe_cast<int>(getStatus.code()));
JLOG(m_journal.error()) << getStatus.ToString();
}
}
@@ -303,19 +334,44 @@ public:
std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
fetchBatch(std::vector<uint256> const& hashes) override
{
std::vector<std::shared_ptr<NodeObject>> results;
results.reserve(hashes.size());
XRPL_ASSERT(m_db, "xrpl::NodeStore::RocksDBBackend::fetchBatch : non-null database");
if (hashes.empty())
{
return {{}, ok};
}
// Use MultiGet for parallel reads to allow RocksDB to fetch multiple keys concurrently,
// significantly improving throughput compared to sequential fetch() calls.
std::vector<rocksdb::Slice> keys;
keys.reserve(hashes.size());
for (auto const& h : hashes)
{
std::shared_ptr<NodeObject> nObj;
Status status = fetch(h, &nObj);
if (status != ok)
keys.emplace_back(std::bit_cast<char const*>(h.data()), m_keyBytes);
}
rocksdb::ReadOptions options;
options.async_io = true; // Enable for better concurrency on supported platforms.
std::vector<std::string> values(hashes.size());
auto statuses = m_db->MultiGet(options, keys, &values);
std::vector<std::shared_ptr<NodeObject>> results(hashes.size());
for (size_t i = 0; i < hashes.size(); ++i)
{
if (statuses[i].ok())
{
results.push_back({});
DecodedBlob decoded(hashes[i].data(), values[i].data(), values[i].size());
if (decoded.wasOk())
{
results[i] = decoded.createObject();
}
}
else
else if (!statuses[i].IsNotFound())
{
results.push_back(nObj);
// Log other errors but continue processing.
JLOG(m_journal.warn()) << "fetchBatch: MultiGet error for key "
<< keys[i].ToString() << ": " << statuses[i].ToString();
}
}
@@ -331,25 +387,45 @@ public:
void
storeBatch(Batch const& batch) override
{
XRPL_ASSERT(
m_db,
"xrpl::NodeStore::RocksDBBackend::storeBatch : non-null "
"database");
rocksdb::WriteBatch wb;
XRPL_ASSERT(m_db, "xrpl::NodeStore::RocksDBBackend::storeBatch : non-null database");
if (batch.empty())
{
return;
}
rocksdb::WriteBatch wb;
for (auto const& e : batch)
{
EncodedBlob encoded(e);
wb.Put(
rocksdb::Slice(std::bit_cast<char const*>(encoded.getKey()), m_keyBytes),
rocksdb::Slice(std::bit_cast<char const*>(encoded.getData()), encoded.getSize()));
}
rocksdb::WriteOptions const options;
// Configure WriteOptions for high throughput.
// Note: no_slowdown is intentionally NOT set here. When set to true, RocksDB returns an
// error instead of stalling when write buffers are full, which could cause write
// failures during high load. We prefer to accept brief stalls over dropped writes.
rocksdb::WriteOptions options;
// Setting `sync = false` improves write throughput significantly by allowing the OS to
// batch fsync operations, rather than forcing immediate disk synchronization on every
// write. The Write-Ahead Log (WAL) is still written and flushed, so database consistency is
// maintained across clean restarts and crashes.
//
// Note: On hard shutdown up to a few seconds of recent writes (since the last OS-initiated
// flush) may be lost from this node. However, since ledger data is replicated across
// the network, lost writes can be re-synced from peers during startup.
options.sync = false;
// Keep WAL enabled for crash recovery consistency.
options.disableWAL = false;
// Ensure RocksDB will not aggressive throttle the writes.
options.low_pri = false;
auto ret = m_db->Write(options, &wb);
if (!ret.ok())
Throw<std::runtime_error>("storeBatch failed: " + ret.ToString());
}

View File

@@ -520,6 +520,13 @@ public:
srcParams.set("type", srcBackendType);
srcParams.set("path", node_db.path());
beast::temp_dir dest_db;
Section destParams;
destParams.set("type", destBackendType);
destParams.set("path", dest_db.path());
testcase("import into '" + destBackendType + "' from '" + srcBackendType + "'");
// Create a batch
auto batch = createPredictableBatch(numObjectsToTest, seedValue);
@@ -538,16 +545,9 @@ public:
Manager::instance().make_Database(megabytes(4), scheduler, 2, srcParams, journal_);
// Set up the destination database
beast::temp_dir dest_db;
Section destParams;
destParams.set("type", destBackendType);
destParams.set("path", dest_db.path());
std::unique_ptr<Database> dest =
Manager::instance().make_Database(megabytes(4), scheduler, 2, destParams, journal_);
testcase("import into '" + destBackendType + "' from '" + srcBackendType + "'");
// Do the import
dest->importDatabase(*src);

View File

@@ -53,3 +53,7 @@ if(NOT WIN32)
target_link_libraries(xrpl.test.net PRIVATE xrpl.imports.test)
add_dependencies(xrpl.tests xrpl.test.net)
endif()
xrpl_add_test(nodestore)
target_link_libraries(xrpl.test.nodestore PRIVATE xrpl.imports.test)
add_dependencies(xrpl.tests xrpl.test.nodestore)

View File

@@ -0,0 +1,334 @@
#include <xrpl/nodestore/Backend.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <vector>
using namespace xrpl;
using namespace xrpl::NodeStore;
// Helper function to convert the pair result into ranges for testing.
std::vector<std::pair<unsigned int, unsigned int>>
calculateRanges(unsigned int batchSize, unsigned int maxThreadCount)
{
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
std::vector<std::pair<unsigned int, unsigned int>> ranges;
ranges.reserve(numThreads);
for (unsigned int t = 0; t < numThreads; ++t)
{
auto const startIdx = t * numItems;
auto const endIdx = std::min(startIdx + numItems, batchSize);
ranges.emplace_back(startIdx, endIdx);
}
return ranges;
}
TEST(BatchParallelism, EmptyBatch)
{
// Empty batch should return 0 threads.
{
auto const batchSize = 0u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 0u);
EXPECT_EQ(numItems, 0u);
// Verify ranges calculation.
auto const ranges = calculateRanges(batchSize, maxThreadCount);
EXPECT_EQ(ranges.size(), numThreads);
}
}
TEST(BatchParallelism, SmallBatches)
{
// Batch size 1 should use 1 thread.
{
auto const batchSize = 1u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 1u);
EXPECT_EQ(numItems, 1u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
EXPECT_EQ(ranges[0].first, 0u);
EXPECT_EQ(ranges[0].second, 1u);
}
// Batch size 2 should use 1 thread.
{
auto const batchSize = 2u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 1u);
EXPECT_EQ(numItems, 2u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
EXPECT_EQ(ranges[0].first, 0u);
EXPECT_EQ(ranges[0].second, 2u);
}
// Batch size 3 should use 1 thread.
{
auto const batchSize = 3u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 1u);
EXPECT_EQ(numItems, 3u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
EXPECT_EQ(ranges[0].first, 0u);
EXPECT_EQ(ranges[0].second, 3u);
}
// Batch size 4 should use 1 thread (exactly 4 items).
{
auto const batchSize = 4u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 1u);
EXPECT_EQ(numItems, 4u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
EXPECT_EQ(ranges[0].first, 0u);
EXPECT_EQ(ranges[0].second, 4u);
}
}
TEST(BatchParallelism, MediumBatches)
{
// Batch size 5 should use 2 threads.
{
auto const batchSize = 5u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 2u); // ceil(5/4) = 2
EXPECT_EQ(numItems, 3u); // ceil(5/2) = 3
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
EXPECT_EQ(ranges[0].first, 0u);
EXPECT_EQ(ranges[0].second, 3u);
EXPECT_EQ(ranges[1].first, 3u);
EXPECT_EQ(ranges[1].second, 5u);
}
// Batch size 8 should use 2 threads.
{
auto const batchSize = 8u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 2u);
EXPECT_EQ(numItems, 4u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
for (size_t i = 0; i < numThreads; ++i)
{
EXPECT_EQ(ranges[i].first, i * numItems);
EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
}
}
// Batch size 15 should use 4 threads (ceil(15/4) = 4).
{
auto const batchSize = 15u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 4u);
EXPECT_EQ(numItems, 4u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
for (size_t i = 0; i < numThreads - 1; ++i)
{
EXPECT_EQ(ranges[i].first, i * numItems);
EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
}
EXPECT_EQ(ranges[numThreads - 1].first, (numThreads - 1) * numItems);
EXPECT_EQ(ranges[numThreads - 1].second, batchSize); // Last range gets remaining items.
}
// Batch size 22 should use 6 threads.
{
auto const batchSize = 22u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 6u); // ceil(22/4) = 6
EXPECT_EQ(numItems, 4u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
for (size_t i = 0; i < numThreads - 1; ++i)
{
EXPECT_EQ(ranges[i].first, i * numItems);
EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
}
EXPECT_EQ(ranges[numThreads - 1].first, (numThreads - 1) * numItems);
EXPECT_EQ(ranges[numThreads - 1].second, batchSize);
}
// Batch size 32 should use 8 threads.
{
auto const batchSize = 32u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 8u);
EXPECT_EQ(numItems, 4u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
for (size_t i = 0; i < numThreads; ++i)
{
EXPECT_EQ(ranges[i].first, i * numItems);
EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
}
}
}
TEST(BatchParallelism, LargeBatches)
{
// Batch size 100 should use 8 threads (max limit).
{
auto const batchSize = 100u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 8u);
EXPECT_EQ(numItems, 13u); // ceil(100/8) = 13
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
for (size_t i = 0; i < numThreads - 1; ++i)
{
EXPECT_EQ(ranges[i].first, i * numItems);
EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
}
EXPECT_EQ(ranges[numThreads - 1].first, (numThreads - 1) * numItems);
EXPECT_EQ(ranges[numThreads - 1].second, batchSize);
}
// Batch size 1000 with 8 hw threads.
{
auto const batchSize = 1000u;
auto const maxThreadCount = 8u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 8u);
EXPECT_EQ(numItems, 125u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
for (size_t i = 0; i < numThreads; ++i)
{
EXPECT_EQ(ranges[i].first, i * numItems);
EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
}
}
}
TEST(BatchParallelism, HardwareThreadLimits)
{
// With only 1 thread available.
{
auto const batchSize = 100u;
auto const maxThreadCount = 1u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 1u);
EXPECT_EQ(numItems, 100u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
EXPECT_EQ(ranges[0].first, 0u);
EXPECT_EQ(ranges[0].second, 100u);
}
// With 2 threads.
{
auto const batchSize = 50u;
auto const maxThreadCount = 2u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 2u);
EXPECT_EQ(numItems, 25u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
for (size_t i = 0; i < numThreads; ++i)
{
EXPECT_EQ(ranges[i].first, i * numItems);
EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
}
}
// With 10 threads.
{
auto const batchSize = 50u;
auto const maxThreadCount = 12u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 10u); // ceil(50/4) = 13, but numThreads = 10.
EXPECT_EQ(numItems, 5u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
for (size_t i = 0; i < numThreads; ++i)
{
EXPECT_EQ(ranges[i].first, i * numItems);
EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
}
}
// With many threads.
{
auto const batchSize = 20u;
auto const maxThreadCount = 100u;
auto const [numThreads, numItems] =
Backend::calculateBatchParallelism(batchSize, maxThreadCount);
EXPECT_EQ(numThreads, 5u); // ceil(20/4) = 5, limited by batch size.
EXPECT_EQ(numItems, 4u);
auto const ranges = calculateRanges(batchSize, maxThreadCount);
ASSERT_EQ(ranges.size(), numThreads);
for (size_t i = 0; i < numThreads; ++i)
{
EXPECT_EQ(ranges[i].first, i * numItems);
EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
}
}
}

View File

@@ -0,0 +1,8 @@
#include <gtest/gtest.h>
int
main(int argc, char** argv)
{
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}