Add configurable NuDB block size feature (#5468)

As XRPL network demand grows and ledger sizes increase, the default 4K NuDB block size becomes a performance bottleneck, especially on high-performance storage systems. Modern SSDs and enterprise storage often perform better with larger block sizes, but rippled previously had no way to configure this parameter. This change therefore implements configurable NuDB block size support, allowing operators to optimize storage performance based on their hardware configuration. The feature adds a new `nudb_block_size` configuration parameter that enables block sizes from 4K to 32K bytes, with comprehensive validation and backward compatibility.

Specific changes are:
- Implements `parseBlockSize()` function with validation.
- Adds `nudb_block_size` configuration parameter.
- Supports block sizes from 4K to 32K (power of 2).
- Adds comprehensive logging and error handling.
- Maintains backward compatibility with 4K default.
- Adds unit tests for block size validation.
- Updates configuration documentation with performance guidance.
- Marks feature as experimental.
- Applies code formatting fixes.

Co-authored-by: Bart Thomee <11445373+bthomee@users.noreply.github.com>
This commit is contained in:
Valon Mamudi
2025-10-21 02:51:44 +02:00
committed by GitHub
parent 83ee3788e1
commit a7792ebcae
4 changed files with 589 additions and 1 deletions

View File

@@ -975,6 +975,47 @@
# number of ledger records online. Must be greater
# than or equal to ledger_history.
#
# Optional keys for NuDB only:
#
# nudb_block_size EXPERIMENTAL: Block size in bytes for NuDB storage.
# Must be a power of 2 between 4096 and 32768. Default is 4096.
#
# This parameter controls the fundamental storage unit
# size for NuDB's internal data structures. The choice
# of block size can significantly impact performance
# depending on your storage hardware and filesystem:
#
# - 4096 bytes: Optimal for most standard SSDs and
# traditional filesystems (ext4, NTFS, HFS+).
# Provides good balance of performance and storage
# efficiency. Recommended for most deployments.
# Minimizes memory footprint and provides consistent
# low-latency access patterns across diverse hardware.
#
# - 8192-16384 bytes: May improve performance on
# high-end NVMe SSDs and copy-on-write filesystems
# like ZFS or Btrfs that benefit from larger block
# alignment. Can reduce metadata overhead for large
# databases. Offers better sequential throughput and
# reduced I/O operations at the cost of higher memory
# usage per operation.
#
# - 32768 bytes (32K): Maximum supported block size
# for high-performance scenarios with very fast
# storage. May increase memory usage and reduce
# efficiency for smaller databases. Best suited for
# enterprise environments with abundant RAM.
#
# Performance testing is recommended before deploying
# any non-default block size in production environments.
#
# Note: This setting cannot be changed after database
# creation without rebuilding the entire database.
# Choose carefully based on your hardware and expected
# database size.
#
# Example: nudb_block_size=4096
#
# These keys modify the behavior of online_delete, and thus are only
# relevant if online_delete is defined and non-zero:
#
@@ -1471,6 +1512,7 @@ secure_gateway = 127.0.0.1
[node_db]
type=NuDB
path=/var/lib/rippled/db/nudb
nudb_block_size=4096
online_delete=512
advisory_delete=0

View File

@@ -0,0 +1,478 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2012, 2013 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include <test/nodestore/TestBase.h>
#include <test/unit_test/SuiteJournal.h>
#include <xrpld/nodestore/DummyScheduler.h>
#include <xrpld/nodestore/Manager.h>
#include <xrpl/basics/BasicConfig.h>
#include <xrpl/basics/ByteUtilities.h>
#include <xrpl/beast/utility/temp_dir.h>
#include <memory>
#include <sstream>
namespace ripple {
namespace NodeStore {
class NuDBFactory_test : public TestBase
{
private:
// Helper function to create a Section with specified parameters
Section
createSection(std::string const& path, std::string const& blockSize = "")
{
Section params;
params.set("type", "nudb");
params.set("path", path);
if (!blockSize.empty())
params.set("nudb_block_size", blockSize);
return params;
}
// Helper function to create a backend and test basic functionality
bool
testBackendFunctionality(
Section const& params,
std::size_t expectedBlocksize)
{
try
{
DummyScheduler scheduler;
test::SuiteJournal journal("NuDBFactory_test", *this);
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
if (!BEAST_EXPECT(backend))
return false;
if (!BEAST_EXPECT(backend->getBlockSize() == expectedBlocksize))
return false;
backend->open();
if (!BEAST_EXPECT(backend->isOpen()))
return false;
// Test basic store/fetch functionality
auto batch = createPredictableBatch(10, 12345);
storeBatch(*backend, batch);
Batch copy;
fetchCopyOfBatch(*backend, &copy, batch);
backend->close();
return areBatchesEqual(batch, copy);
}
catch (...)
{
return false;
}
}
// Helper function to test log messages
void
testLogMessage(
Section const& params,
beast::severities::Severity level,
std::string const& expectedMessage)
{
test::StreamSink sink(level);
beast::Journal journal(sink);
DummyScheduler scheduler;
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
std::string logOutput = sink.messages().str();
BEAST_EXPECT(logOutput.find(expectedMessage) != std::string::npos);
}
// Helper function to test power of two validation
void
testPowerOfTwoValidation(std::string const& size, bool shouldWork)
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), size);
test::StreamSink sink(beast::severities::kWarning);
beast::Journal journal(sink);
DummyScheduler scheduler;
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
std::string logOutput = sink.messages().str();
bool hasWarning =
logOutput.find("Invalid nudb_block_size") != std::string::npos;
BEAST_EXPECT(hasWarning == !shouldWork);
}
public:
void
testDefaultBlockSize()
{
testcase("Default block size (no nudb_block_size specified)");
beast::temp_dir tempDir;
auto params = createSection(tempDir.path());
// Should work with default 4096 block size
BEAST_EXPECT(testBackendFunctionality(params, 4096));
}
void
testValidBlockSizes()
{
testcase("Valid block sizes");
std::vector<std::size_t> validSizes = {4096, 8192, 16384, 32768};
for (auto const& size : validSizes)
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), to_string(size));
BEAST_EXPECT(testBackendFunctionality(params, size));
}
// Empty value is ignored by the config parser, so uses the
// default
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), "");
BEAST_EXPECT(testBackendFunctionality(params, 4096));
}
void
testInvalidBlockSizes()
{
testcase("Invalid block sizes");
std::vector<std::string> invalidSizes = {
"2048", // Too small
"1024", // Too small
"65536", // Too large
"131072", // Too large
"5000", // Not power of 2
"6000", // Not power of 2
"10000", // Not power of 2
"0", // Zero
"-1", // Negative
"abc", // Non-numeric
"4k", // Invalid format
"4096.5" // Decimal
};
for (auto const& size : invalidSizes)
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), size);
// Fails
BEAST_EXPECT(!testBackendFunctionality(params, 4096));
}
// Test whitespace cases separately since lexical_cast may handle them
std::vector<std::string> whitespaceInvalidSizes = {
"4096 ", // Trailing space - might be handled by lexical_cast
" 4096" // Leading space - might be handled by lexical_cast
};
for (auto const& size : whitespaceInvalidSizes)
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), size);
// Fails
BEAST_EXPECT(!testBackendFunctionality(params, 4096));
}
}
void
testLogMessages()
{
testcase("Log message verification");
// Test valid custom block size logging
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), "8192");
testLogMessage(
params,
beast::severities::kInfo,
"Using custom NuDB block size: 8192");
}
// Test invalid block size failure
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), "5000");
test::StreamSink sink(beast::severities::kWarning);
beast::Journal journal(sink);
DummyScheduler scheduler;
try
{
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
fail();
}
catch (std::exception const& e)
{
std::string logOutput{e.what()};
BEAST_EXPECT(
logOutput.find("Invalid nudb_block_size: 5000") !=
std::string::npos);
BEAST_EXPECT(
logOutput.find(
"Must be power of 2 between 4096 and 32768") !=
std::string::npos);
}
}
// Test non-numeric value failure
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), "invalid");
test::StreamSink sink(beast::severities::kWarning);
beast::Journal journal(sink);
DummyScheduler scheduler;
try
{
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
fail();
}
catch (std::exception const& e)
{
std::string logOutput{e.what()};
BEAST_EXPECT(
logOutput.find("Invalid nudb_block_size value: invalid") !=
std::string::npos);
}
}
}
void
testPowerOfTwoValidation()
{
testcase("Power of 2 validation logic");
// Test edge cases around valid range
std::vector<std::pair<std::string, bool>> testCases = {
{"4095", false}, // Just below minimum
{"4096", true}, // Minimum valid
{"4097", false}, // Just above minimum, not power of 2
{"8192", true}, // Valid power of 2
{"8193", false}, // Just above valid power of 2
{"16384", true}, // Valid power of 2
{"32768", true}, // Maximum valid
{"32769", false}, // Just above maximum
{"65536", false} // Power of 2 but too large
};
for (auto const& [size, shouldWork] : testCases)
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), size);
// We test the validation logic by catching exceptions for invalid
// values
test::StreamSink sink(beast::severities::kWarning);
beast::Journal journal(sink);
DummyScheduler scheduler;
try
{
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
BEAST_EXPECT(shouldWork);
}
catch (std::exception const& e)
{
std::string logOutput{e.what()};
BEAST_EXPECT(
logOutput.find("Invalid nudb_block_size") !=
std::string::npos);
}
}
}
void
testBothConstructorVariants()
{
testcase("Both constructor variants work with custom block size");
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), "16384");
DummyScheduler scheduler;
test::SuiteJournal journal("NuDBFactory_test", *this);
// Test first constructor (without nudb::context)
{
auto backend1 = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
BEAST_EXPECT(backend1 != nullptr);
BEAST_EXPECT(testBackendFunctionality(params, 16384));
}
// Test second constructor (with nudb::context)
// Note: This would require access to nudb::context, which might not be
// easily testable without more complex setup. For now, we test that
// the factory can create backends with the first constructor.
}
void
testConfigurationParsing()
{
testcase("Configuration parsing edge cases");
// Test that whitespace is handled correctly
std::vector<std::string> validFormats = {
"8192" // Basic valid format
};
// Test whitespace handling separately since lexical_cast behavior may
// vary
std::vector<std::string> whitespaceFormats = {
" 8192", // Leading space - may or may not be handled by
// lexical_cast
"8192 " // Trailing space - may or may not be handled by
// lexical_cast
};
// Test basic valid format
for (auto const& format : validFormats)
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), format);
test::StreamSink sink(beast::severities::kInfo);
beast::Journal journal(sink);
DummyScheduler scheduler;
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
// Should log success message for valid values
std::string logOutput = sink.messages().str();
bool hasSuccessMessage =
logOutput.find("Using custom NuDB block size") !=
std::string::npos;
BEAST_EXPECT(hasSuccessMessage);
}
// Test whitespace formats - these should work if lexical_cast handles
// them
for (auto const& format : whitespaceFormats)
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), format);
// Use a lower threshold to capture both info and warning messages
test::StreamSink sink(beast::severities::kDebug);
beast::Journal journal(sink);
DummyScheduler scheduler;
try
{
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
fail();
}
catch (...)
{
// Fails
BEAST_EXPECT(!testBackendFunctionality(params, 8192));
}
}
}
void
testDataPersistence()
{
testcase("Data persistence with different block sizes");
std::vector<std::string> blockSizes = {
"4096", "8192", "16384", "32768"};
for (auto const& size : blockSizes)
{
beast::temp_dir tempDir;
auto params = createSection(tempDir.path(), size);
DummyScheduler scheduler;
test::SuiteJournal journal("NuDBFactory_test", *this);
// Create test data
auto batch = createPredictableBatch(50, 54321);
// Store data
{
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
backend->open();
storeBatch(*backend, batch);
backend->close();
}
// Retrieve data in new backend instance
{
auto backend = Manager::instance().make_Backend(
params, megabytes(4), scheduler, journal);
backend->open();
Batch copy;
fetchCopyOfBatch(*backend, &copy, batch);
BEAST_EXPECT(areBatchesEqual(batch, copy));
backend->close();
}
}
}
void
run() override
{
testDefaultBlockSize();
testValidBlockSizes();
testInvalidBlockSizes();
testLogMessages();
testPowerOfTwoValidation();
testBothConstructorVariants();
testConfigurationParsing();
testDataPersistence();
}
};
BEAST_DEFINE_TESTSUITE(NuDBFactory, ripple_core, ripple);
} // namespace NodeStore
} // namespace ripple

View File

@@ -53,6 +53,14 @@ public:
virtual std::string
getName() = 0;
/** Get the block size for backends that support it
*/
virtual std::optional<std::size_t>
getBlockSize() const
{
return std::nullopt;
}
/** Open the backend.
@param createIfMissing Create the database files if necessary.
This allows the caller to catch exceptions.

View File

@@ -24,6 +24,7 @@
#include <xrpld/nodestore/detail/codec.h>
#include <xrpl/basics/contract.h>
#include <xrpl/beast/core/LexicalCast.h>
#include <xrpl/beast/utility/instrumentation.h>
#include <boost/filesystem.hpp>
@@ -52,6 +53,7 @@ public:
size_t const keyBytes_;
std::size_t const burstSize_;
std::string const name_;
std::size_t const blockSize_;
nudb::store db_;
std::atomic<bool> deletePath_;
Scheduler& scheduler_;
@@ -66,6 +68,7 @@ public:
, keyBytes_(keyBytes)
, burstSize_(burstSize)
, name_(get(keyValues, "path"))
, blockSize_(parseBlockSize(name_, keyValues, journal))
, deletePath_(false)
, scheduler_(scheduler)
{
@@ -85,6 +88,7 @@ public:
, keyBytes_(keyBytes)
, burstSize_(burstSize)
, name_(get(keyValues, "path"))
, blockSize_(parseBlockSize(name_, keyValues, journal))
, db_(context)
, deletePath_(false)
, scheduler_(scheduler)
@@ -114,6 +118,12 @@ public:
return name_;
}
std::optional<std::size_t>
getBlockSize() const override
{
return blockSize_;
}
void
open(bool createIfMissing, uint64_t appType, uint64_t uid, uint64_t salt)
override
@@ -145,7 +155,7 @@ public:
uid,
salt,
keyBytes_,
nudb::block_size(kp),
blockSize_,
0.50,
ec);
if (ec == nudb::errc::file_exists)
@@ -361,6 +371,56 @@ public:
{
return 3;
}
private:
static std::size_t
parseBlockSize(
std::string const& name,
Section const& keyValues,
beast::Journal journal)
{
using namespace boost::filesystem;
auto const folder = path(name);
auto const kp = (folder / "nudb.key").string();
std::size_t const defaultSize =
nudb::block_size(kp); // Default 4K from NuDB
std::size_t blockSize = defaultSize;
std::string blockSizeStr;
if (!get_if_exists(keyValues, "nudb_block_size", blockSizeStr))
{
return blockSize; // Early return with default
}
try
{
std::size_t const parsedBlockSize =
beast::lexicalCastThrow<std::size_t>(blockSizeStr);
// Validate: must be power of 2 between 4K and 32K
if (parsedBlockSize < 4096 || parsedBlockSize > 32768 ||
(parsedBlockSize & (parsedBlockSize - 1)) != 0)
{
std::stringstream s;
s << "Invalid nudb_block_size: " << parsedBlockSize
<< ". Must be power of 2 between 4096 and 32768.";
Throw<std::runtime_error>(s.str());
}
JLOG(journal.info())
<< "Using custom NuDB block size: " << parsedBlockSize
<< " bytes";
return parsedBlockSize;
}
catch (std::exception const& e)
{
std::stringstream s;
s << "Invalid nudb_block_size value: " << blockSizeStr
<< ". Error: " << e.what();
Throw<std::runtime_error>(s.str());
}
}
};
//------------------------------------------------------------------------------