Defensive checks

Add tests, add gracefull stopping
Make pending writes size_t to avoid casting everywhere
2026-03-22 20:52:35 +00:00 · 2026-03-21 20:06:55 -04:00 · 2026-03-21 18:57:03 -04:00 · 2026-03-21 10:24:24 -04:00 · 2026-03-21 10:21:24 -04:00 · 2026-03-20 18:03:17 -04:00
9 changed files with 852 additions and 62 deletions
--- a/.github/scripts/levelization/results/ordering.txt
+++ b/.github/scripts/levelization/results/ordering.txt
@@ -177,6 +177,7 @@ test.unit_test > xrpl.protocol
 tests.libxrpl > xrpl.basics
 tests.libxrpl > xrpl.json
 tests.libxrpl > xrpl.net
 tests.libxrpl > xrpl.nodestore
 tests.libxrpl > xrpl.protocol
 tests.libxrpl > xrpl.protocol_autogen
 xrpl.conditions > xrpl.basics
--- a/include/xrpl/nodestore/Backend.h
+++ b/include/xrpl/nodestore/Backend.h
@@ -138,6 +138,22 @@ public:
    /** Returns the number of file descriptors the backend expects to need. */
    virtual int
    fdRequired() const = 0;
    /** The number of hardware threads to use for compression of a batch. */
    static unsigned int const numHardwareThreads;
    /** Calculate parallelization parameters for a batch of items.
        Determines the number of threads and items per thread needed for parallel batch processing.
        @param batchSize Number of items to process
        @param maxThreadCount Maximum number of threads to use.
        @return A pair of (numThreads, numItems) where numThreads is the exact number of threads to
                use, and numItems is the number of items per thread. The last thread may process
                fewer items.
    */
    static std::pair<unsigned int, unsigned int>
    calculateBatchParallelism(unsigned int batchSize, unsigned int maxThreadCount);
 };
 }  // namespace NodeStore
--- a/src/libxrpl/nodestore/backend/Backend.cpp
+++ b/src/libxrpl/nodestore/backend/Backend.cpp
@@ -0,0 +1,68 @@
 #include <xrpl/nodestore/Backend.h>
 #include <algorithm>
 #include <thread>
 namespace xrpl {
 namespace NodeStore {
 // Initialize the static constant for hardware thread count. The `hardware_concurrency` function can
 // return 0 on some platforms, in which case we default to 1. We limit the total number of threads
 // to 8 to avoid contention.
 unsigned int const Backend::numHardwareThreads = []() {
    auto const hw = std::thread::hardware_concurrency();
    return std::min(std::max(hw, 1u), 8u);
 }();
 std::pair<unsigned int, unsigned int>
 Backend::calculateBatchParallelism(unsigned int batchSize, unsigned int maxThreadCount)
 {
    XRPL_ASSERT(
        maxThreadCount > 0,
        "xrpl::NodeStore::Backend::calculateBatchParallelism : maxThreadCount > 0");
    if (maxThreadCount == 0)
    {
        // LCOV_EXCL_START
        UNREACHABLE("xrpl::NodeStore::Backend::calculateBatchParallelism : maxThreadCount == 0");
        return {1, batchSize};
        // LCOV_EXCL_STOP
    }
    if (batchSize == 0)
    {
        return {0, 0};
    }
    // Estimate the number of threads using ceiling division: aim for at least 4 items per thread,
    // but don't exceed the number of available threads.
    auto const initialThreads = std::min((batchSize + 3u) / 4u, maxThreadCount);
    // Calculate number of items per thread.
    auto const numItems = (batchSize + initialThreads - 1u) / initialThreads;
    // Calculate the actual number of threads needed. After rounding up numItems, we may need fewer
    // threads than initially estimated.
    auto const actualThreads = (batchSize + numItems - 1u) / numItems;
    XRPL_ASSERT(
        numItems <= batchSize,
        "xrpl::NodeStore::Backend::calculateBatchParallelism : numItems <= batchSize");
    XRPL_ASSERT(
        actualThreads <= batchSize,
        "xrpl::NodeStore::Backend::calculateBatchParallelism : actualThreads <= batchSize");
    XRPL_ASSERT(
        actualThreads <= maxThreadCount,
        "xrpl::NodeStore::Backend::calculateBatchParallelism : actualThreads <= hwThreadCount");
    if (numItems > batchSize || actualThreads > batchSize || actualThreads > maxThreadCount)
    {
        // LCOV_EXCL_START
        UNREACHABLE("xrpl::NodeStore::Backend::calculateBatchParallelism : sanity check failed");
        return {1, batchSize};
        // LCOV_EXCL_STOP
    }
    return {actualThreads, numItems};
 }
 }  // namespace NodeStore
 }  // namespace xrpl
--- a/src/libxrpl/nodestore/backend/NuDBFactory.cpp
+++ b/src/libxrpl/nodestore/backend/NuDBFactory.cpp
@@ -7,15 +7,22 @@
 #include <xrpl/nodestore/detail/EncodedBlob.h>
 #include <xrpl/nodestore/detail/codec.h>
 #include <boost/asio/post.hpp>
 #include <boost/asio/thread_pool.hpp>
 #include <boost/filesystem.hpp>
 #include <nudb/nudb.hpp>
 #include <atomic>
 #include <chrono>
 #include <cstdint>
 #include <cstdio>
 #include <exception>
 #include <latch>
 #include <memory>
 #include <mutex>
 #include <thread>
 #include <vector>
 namespace xrpl {
 namespace NodeStore {
@@ -23,21 +30,6 @@ namespace NodeStore {
 class NuDBBackend : public Backend
 {
 public:
    // "appnum" is an application-defined constant stored in the header of a
    // NuDB database. We used it to identify shard databases before that code
    // was removed. For now, its only use is a sanity check that the database
    // was created by xrpld.
    static constexpr std::uint64_t appnum = 1;
    beast::Journal const j_;
    size_t const keyBytes_;
    std::size_t const burstSize_;
    std::string const name_;
    std::size_t const blockSize_;
    nudb::store db_;
    std::atomic<bool> deletePath_;
    Scheduler& scheduler_;
    NuDBBackend(
        size_t keyBytes,
        Section const& keyValues,
@@ -51,6 +43,7 @@ public:
        , blockSize_(parseBlockSize(name_, keyValues, journal))
        , deletePath_(false)
        , scheduler_(scheduler)
        , threadPool_(numHardwareThreads)
    {
        if (name_.empty())
            Throw<std::runtime_error>("nodestore: Missing path in NuDB backend");
@@ -71,6 +64,7 @@ public:
        , db_(context)
        , deletePath_(false)
        , scheduler_(scheduler)
        , threadPool_(numHardwareThreads)
    {
        if (name_.empty())
            Throw<std::runtime_error>("nodestore: Missing path in NuDB backend");
@@ -80,7 +74,32 @@ public:
    {
        try
        {
-            // close can throw and we don't want the destructor to throw.
+            // Set shutdown flag to prevent new batch operations from starting. This must happen
            // before stop() is called to ensure fetchBatch/storeBatch check the flag before posting
            // any new tasks.
            shutdown_.store(true, std::memory_order_release);
            // Wait for all active operations to complete.
            while (pendingReads_.load(std::memory_order_acquire) > 0 ||
                   pendingWrites_.load(std::memory_order_acquire) > 0)
            {
                std::this_thread::yield();
            }
            // Signal the thread pool to stop accepting new work. This ensures no new tasks will be
            // posted after this point.
            threadPool_.stop();
            // Wait for all currently executing thread pool tasks to complete. This prevents worker
            // threads from accessing the database after close().
            threadPool_.join();
            // Verify all writes have completed.
            XRPL_ASSERT(
                pendingWrites_.load() == 0, "xrpl::NuDBBackend::~NuDBBackend : pendingWrites == 0");
            // Close the database. At this point, all threads have stopped and no pending reads and
            // writes remain, so it's safe to close the database.
            close();
        }
        catch (nudb::system_error const&)  // NOLINT(bugprone-empty-catch)
@@ -109,9 +128,7 @@ public:
        if (db_.is_open())
        {
            // LCOV_EXCL_START
-            UNREACHABLE(
+            UNREACHABLE("xrpl::NodeStore::NuDBBackend::open : database is already open");
                "xrpl::NodeStore::NuDBBackend::open : database is already "
                "open");
            JLOG(j_.error()) << "database is already open";
            return;
            // LCOV_EXCL_STOP
@@ -127,16 +144,24 @@ public:
            nudb::create<nudb::xxhasher>(
                dp, kp, lp, appType, uid, salt, keyBytes_, blockSize_, 0.50, ec);
            if (ec == nudb::errc::file_exists)
            {
                ec = {};
            }
            if (ec)
            {
                Throw<nudb::system_error>(ec);
            }
        }
        db_.open(dp, kp, lp, ec);
        if (ec)
        {
            Throw<nudb::system_error>(ec);
        }
        if (db_.appnum() != appnum)
        {
            Throw<std::runtime_error>("nodestore: unknown appnum");
        }
        db_.set_burst(burstSize_);
    }
@@ -181,9 +206,22 @@ public:
    Status
    fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pno) override
    {
        // Increment pending reads counter on entry, decrement on exit. This ensures the destructor
        // waits for this operation to complete.
        ++pendingReads_;
        auto guard = [this](void*) { --pendingReads_; };
        std::unique_ptr<void, decltype(guard)> opGuard(reinterpret_cast<void*>(1), guard);
        // Check if we're shutting down. If so, return immediately instead of doing any work.
        if (shutdown_.load(std::memory_order_acquire))
        {
            return backendError;
        }
        Status status = ok;
        pno->reset();
        nudb::error_code ec;
        db_.fetch(
            hash.data(),
            [&hash, pno, &status](void const* data, std::size_t size) {
@@ -199,30 +237,119 @@ public:
                status = ok;
            },
            ec);
        if (ec == nudb::error::key_not_found)
        {
            return notFound;
        }
        if (ec)
        {
            Throw<nudb::system_error>(ec);
        }
        return status;
    }
    std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
    fetchBatch(std::vector<uint256> const& hashes) override
    {
-        std::vector<std::shared_ptr<NodeObject>> results;
+        if (hashes.empty())
        results.reserve(hashes.size());
        for (auto const& h : hashes)
        {
-            std::shared_ptr<NodeObject> nObj;
+            return {{}, ok};
-            Status status = fetch(h, &nObj);
+        }
-            if (status != ok)
+
        // Increment pending reads counter on entry, decrement on exit. This ensures the destructor
        // waits for this operation to complete.
        pendingReads_ += hashes.size();
        auto guard = [this, &hashes](void*) { pendingReads_ -= hashes.size(); };
        std::unique_ptr<void, decltype(guard)> opGuard(reinterpret_cast<void*>(1), guard);
        // Check if we're shutting down. If so, return immediately instead of doing any work.
        if (shutdown_.load(std::memory_order_acquire))
        {
            return {{}, backendError};
        }
        std::vector<std::shared_ptr<NodeObject>> results(hashes.size());
        // Calculate parallelization parameters for the batch.
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(hashes.size(), numHardwareThreads);
        // If we need only one thread, just do it sequentially. Although it should be impossible to
        // get 0 threads here, handle it gracefully just in case.
        if (numThreads <= 1u)
        {
            for (size_t i = 0; i < hashes.size(); ++i)
            {
-                results.push_back({});
+                std::shared_ptr<NodeObject> nObj;
                if (fetch(hashes[i], &nObj) == ok)
                {
                    results[i] = nObj;
                }
            }
-            else
+            return {results, ok};
        }
        // Use a latch to synchronize task completion.
        std::latch taskCompletion(numThreads);
        // Track exceptions from worker threads.
        std::exception_ptr eptr;
        std::mutex emutex;
        // Submit fetch tasks to the thread pool.
        for (auto t = 0u; t < numThreads; ++t)
        {
            auto const startIdx = t * numItems;
            XRPL_ASSERT(
                startIdx < hashes.size(),
                "xrpl::NuDBFactory::fetchBatch : startIdx < hashes.size()");
            if (startIdx >= hashes.size())
            {
-                results.push_back(nObj);
+                // This should never happen, but is kept as a safety check.
                taskCompletion.count_down();
                continue;
            }
            auto const endIdx = std::min<std::size_t>(startIdx + numItems, hashes.size());
            auto task =
                [this, &hashes, &results, &taskCompletion, &eptr, &emutex, startIdx, endIdx]() {
                    try
                    {
                        // Fetch the items assigned to this task.
                        for (size_t i = startIdx; i < endIdx; ++i)
                        {
                            std::shared_ptr<NodeObject> nObj;
                            if (fetch(hashes[i], &nObj) == ok)
                            {
                                results[i] = nObj;
                            }
                        }
                    }
                    catch (...)
                    {
                        // Store the first exception that occurs. Ensures count_down() is always
                        // called to prevent deadlock.
                        std::lock_guard<std::mutex> lock(emutex);
                        if (!eptr)
                        {
                            eptr = std::current_exception();
                        }
                    }
                    // Signal task completion.
                    taskCompletion.count_down();
                };
            boost::asio::post(threadPool_, std::move(task));
        }
        // Wait for all fetch tasks to complete.
        taskCompletion.wait();
        // Rethrow the first exception if one occurred.
        if (eptr)
        {
            std::rethrow_exception(eptr);
        }
        return {results, ok};
@@ -232,21 +359,39 @@ public:
    do_insert(std::shared_ptr<NodeObject> const& no)
    {
        EncodedBlob e(no);
-        nudb::error_code ec;
+
        nudb::detail::buffer bf;
        auto const result = nodeobject_compress(e.getData(), e.getSize(), bf);
        nudb::error_code ec;
        db_.insert(e.getKey(), result.first, result.second, ec);
        if (ec && ec != nudb::error::key_exists)
        {
            Throw<nudb::system_error>(ec);
        }
    }
    void
    store(std::shared_ptr<NodeObject> const& no) override
    {
        // Increment pending writes counter on entry, decrement on exit. This ensures the destructor
        // waits for this operation to complete.
        ++pendingWrites_;
        auto guard = [this](void*) { --pendingWrites_; };
        std::unique_ptr<void, decltype(guard)> opGuard(reinterpret_cast<void*>(1), guard);
        // Check if we're shutting down. If so, return immediately instead of doing any work.
        if (shutdown_.load(std::memory_order_acquire))
        {
            return;
        }
        BatchWriteReport report{};
        report.writeCount = 1;
        auto const start = std::chrono::steady_clock::now();
        do_insert(no);
        report.elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
            std::chrono::steady_clock::now() - start);
        scheduler_.onBatchWrite(report);
@@ -255,11 +400,127 @@ public:
    void
    storeBatch(Batch const& batch) override
    {
        if (batch.empty())
        {
            return;
        }
        // Increment pending writes counter on entry, decrement on exit. This ensures the destructor
        // waits for this operation to complete.
        pendingWrites_ += batch.size();
        auto guard = [this, &batch](void*) { pendingWrites_ -= batch.size(); };
        std::unique_ptr<void, decltype(guard)> opGuard(reinterpret_cast<void*>(1), guard);
        // Check if we're shutting down. If so, return immediately instead of doing any work.
        if (shutdown_.load(std::memory_order_acquire))
        {
            return;
        }
        BatchWriteReport report{};
        report.writeCount = batch.size();
        auto const start = std::chrono::steady_clock::now();
-        for (auto const& e : batch)
+
-            do_insert(e);
+        // Calculate parallelization parameters for the batch.
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batch.size(), numHardwareThreads);
        // If we need only one thread, just do it sequentially. Although it should be impossible to
        // get 0 threads here, handle it gracefully just in case.
        if (numThreads <= 1u)
        {
            for (auto const& e : batch)
            {
                do_insert(e);
            }
            report.elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
                std::chrono::steady_clock::now() - start);
            scheduler_.onBatchWrite(report);
            return;
        }
        // Helper struct that stores actual item data, not pointers, to avoid dangling references
        // after EncodedBlob and buffer go out of scope in the thread.
        struct CompressedData
        {
            std::vector<std::uint8_t> key;
            std::vector<std::uint8_t> data;
            std::exception_ptr eptr;
        };
        std::vector<CompressedData> compressed(batch.size());
        // Use a latch to synchronize task completion.
        std::latch taskCompletion(numThreads);
        // Submit compression tasks to the thread pool.
        for (auto t = 0u; t < numThreads; ++t)
        {
            auto const startIdx = t * numItems;
            XRPL_ASSERT(
                startIdx < batch.size(), "xrpl::NuDBFactory::storeBatch : startIdx < batch.size()");
            if (startIdx >= batch.size())
            {
                // This should never happen, but is kept as a safety check.
                taskCompletion.count_down();
                continue;
            }
            auto const endIdx = std::min<std::size_t>(startIdx + numItems, batch.size());
            auto task =
                [&batch, &compressed, &taskCompletion, startIdx, endIdx, keyBytes = keyBytes_]() {
                    // Compress the items assigned to this task.
                    for (size_t i = startIdx; i < endIdx; ++i)
                    {
                        auto& item = compressed[i];
                        try
                        {
                            EncodedBlob e(batch[i]);
                            // Copy the key data to avoid dangling pointer.
                            auto const* keyPtr = static_cast<std::uint8_t const*>(e.getKey());
                            item.key.assign(keyPtr, keyPtr + keyBytes);
                            // Compress and copy the data to avoid dangling pointer.
                            nudb::detail::buffer bf;
                            auto const comp = nodeobject_compress(e.getData(), e.getSize(), bf);
                            auto const* dataPtr = static_cast<std::uint8_t const*>(comp.first);
                            item.data.assign(dataPtr, dataPtr + comp.second);
                        }
                        catch (...)
                        {
                            // Store the exception so it can be rethrown in the sequential phase
                            // below.
                            item.eptr = std::current_exception();
                        }
                    }
                    // Signal task completion.
                    taskCompletion.count_down();
                };
            boost::asio::post(threadPool_, std::move(task));
        }
        // Wait for all compression tasks to complete.
        taskCompletion.wait();
        // Insert the compressed data sequentially, since NuDB is designed as an append-only data
        // store that limits concurrent writes.
        for (auto const& item : compressed)
        {
            if (item.eptr)
            {
                std::rethrow_exception(item.eptr);
            }
            nudb::error_code ec;
            db_.insert(item.key.data(), item.data.data(), item.data.size(), ec);
            if (ec && ec != nudb::error::key_exists)
            {
                Throw<nudb::system_error>(ec);
            }
        }
        report.elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
            std::chrono::steady_clock::now() - start);
        scheduler_.onBatchWrite(report);
@@ -276,7 +537,7 @@ public:
        auto const dp = db_.dat_path();
        auto const kp = db_.key_path();
        auto const lp = db_.log_path();
-        // auto const appnum = db_.appnum();
+
        nudb::error_code ec;
        db_.close(ec);
        if (ec)
@@ -310,7 +571,7 @@ public:
    int
    getWriteLoad() override
    {
-        return 0;
+        return pendingWrites_.load();
    }
    void
@@ -385,6 +646,28 @@ private:
            Throw<std::runtime_error>(s.str());
        }
    }
    // "appnum" is an application-defined constant stored in the header of a
    // NuDB database. We used it to identify shard databases before that code
    // was removed. For now, its only use is a sanity check that the database
    // was created by xrpld.
    static constexpr std::uint64_t appnum = 1;
    beast::Journal const j_;
    size_t const keyBytes_;
    std::size_t const burstSize_;
    std::string const name_;
    std::size_t const blockSize_;
    nudb::store db_;
    std::atomic<bool> deletePath_;
    Scheduler& scheduler_;
    std::atomic<size_t> pendingReads_{
        0};  // Declare before threadPool_ to ensure it's destroyed after.
    std::atomic<size_t> pendingWrites_{
        0};  // Declare before threadPool_ to ensure it's destroyed after.
    std::atomic<bool> shutdown_{
        false};  // Declare before threadPool_ to ensure it's destroyed after.
    boost::asio::thread_pool threadPool_;  // Declare after db_ to ensure it's destroyed before.
 };
 //------------------------------------------------------------------------------
--- a/src/libxrpl/nodestore/backend/RocksDBFactory.cpp
+++ b/src/libxrpl/nodestore/backend/RocksDBFactory.cpp
@@ -13,6 +13,7 @@
 #include <atomic>
 #include <memory>
 #include <thread>
 namespace xrpl {
 namespace NodeStore {
@@ -185,6 +186,41 @@ public:
            }
        }
        // Enable pipelined writes for better write concurrency.
        m_options.enable_pipelined_write = true;
        // Set background job parallelism for better compaction/flush performance to the number of
        // hardware threads, unless the value is explicitly provided in the config. The default is
        // 2 (see include/rocksdb/options.h in the Conan dependency directory), so don't use fewer
        // than that if no value is explicitly provided.
        if (keyValues.exists("max_background_jobs"))
        {
            m_options.max_background_jobs = get<unsigned int>(keyValues, "max_background_jobs");
        }
        else if (auto v = numHardwareThreads; v > 2)
        {
            m_options.max_background_jobs = v;
        }
        // Set subcompactions for parallel compaction within a job to the number of hardware
        // threads, unless the value is explicitly provided in the config. The default is 1 (see
        // include/rocksdb/options.h in the Conan dependency directory), so don't use fewer
        // than that if no value is explicitly provided.
        if (keyValues.exists("max_subcompactions"))
        {
            m_options.max_subcompactions = get<unsigned int>(keyValues, "max_subcompactions");
        }
        else if (auto v = numHardwareThreads / 2; v > 1)
        {
            m_options.max_subcompactions = v;
        }
        // Enable direct I/O by default unless explicitly disabled in the config. This bypasses the
        // OS page cache for better predictable performance on SSDs.
        m_options.use_direct_reads = get<bool>(keyValues, "use_direct_io", true);
        m_options.use_direct_io_for_flush_and_compaction =
            get<bool>(keyValues, "use_direct_io", true);
        std::string s1, s2;
        rocksdb::GetStringFromDBOptions(&s1, m_options, "; ");
        rocksdb::GetStringFromColumnFamilyOptions(&s2, m_options, "; ");
@@ -259,23 +295,19 @@ public:
        rocksdb::ReadOptions const options;
        rocksdb::Slice const slice(std::bit_cast<char const*>(hash.data()), m_keyBytes);
        std::string string;
        rocksdb::Status getStatus = m_db->Get(options, slice, &string);
        if (getStatus.ok())
        {
            DecodedBlob decoded(hash.data(), string.data(), string.size());
            if (decoded.wasOk())
            {
                *pObject = decoded.createObject();
            }
            else
            {
-                // Decoding failed, probably corrupted!
+                // Decoding failed, probably corrupted.
                //
                status = dataCorrupt;
            }
        }
@@ -292,7 +324,6 @@ public:
            else
            {
                status = Status(customCode + unsafe_cast<int>(getStatus.code()));
                JLOG(m_journal.error()) << getStatus.ToString();
            }
        }
@@ -303,19 +334,44 @@ public:
    std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
    fetchBatch(std::vector<uint256> const& hashes) override
    {
-        std::vector<std::shared_ptr<NodeObject>> results;
+        XRPL_ASSERT(m_db, "xrpl::NodeStore::RocksDBBackend::fetchBatch : non-null database");
-        results.reserve(hashes.size());
+
        if (hashes.empty())
        {
            return {{}, ok};
        }
        // Use MultiGet for parallel reads to allow RocksDB to fetch multiple keys concurrently,
        // significantly improving throughput compared to sequential fetch() calls.
        std::vector<rocksdb::Slice> keys;
        keys.reserve(hashes.size());
        for (auto const& h : hashes)
        {
-            std::shared_ptr<NodeObject> nObj;
+            keys.emplace_back(std::bit_cast<char const*>(h.data()), m_keyBytes);
-            Status status = fetch(h, &nObj);
+        }
-            if (status != ok)
+
        rocksdb::ReadOptions options;
        options.async_io = true;  // Enable for better concurrency on supported platforms.
        std::vector<std::string> values(hashes.size());
        auto statuses = m_db->MultiGet(options, keys, &values);
        std::vector<std::shared_ptr<NodeObject>> results(hashes.size());
        for (size_t i = 0; i < hashes.size(); ++i)
        {
            if (statuses[i].ok())
            {
-                results.push_back({});
+                DecodedBlob decoded(hashes[i].data(), values[i].data(), values[i].size());
                if (decoded.wasOk())
                {
                    results[i] = decoded.createObject();
                }
            }
-            else
+            else if (!statuses[i].IsNotFound())
            {
-                results.push_back(nObj);
+                // Log other errors but continue processing.
                JLOG(m_journal.warn()) << "fetchBatch: MultiGet error for key "
                                       << keys[i].ToString() << ": " << statuses[i].ToString();
            }
        }
@@ -331,25 +387,45 @@ public:
    void
    storeBatch(Batch const& batch) override
    {
-        XRPL_ASSERT(
+        XRPL_ASSERT(m_db, "xrpl::NodeStore::RocksDBBackend::storeBatch : non-null database");
            m_db,
            "xrpl::NodeStore::RocksDBBackend::storeBatch : non-null "
            "database");
        rocksdb::WriteBatch wb;
        if (batch.empty())
        {
            return;
        }
        rocksdb::WriteBatch wb;
        for (auto const& e : batch)
        {
            EncodedBlob encoded(e);
            wb.Put(
                rocksdb::Slice(std::bit_cast<char const*>(encoded.getKey()), m_keyBytes),
                rocksdb::Slice(std::bit_cast<char const*>(encoded.getData()), encoded.getSize()));
        }
-        rocksdb::WriteOptions const options;
+        // Configure WriteOptions for high throughput.
        // Note: no_slowdown is intentionally NOT set here. When set to true, RocksDB returns an
        //       error instead of stalling when write buffers are full, which could cause write
        //       failures during high load. We prefer to accept brief stalls over dropped writes.
        rocksdb::WriteOptions options;
        // Setting `sync = false` improves write throughput significantly by allowing the OS to
        // batch fsync operations, rather than forcing immediate disk synchronization on every
        // write. The Write-Ahead Log (WAL) is still written and flushed, so database consistency is
        // maintained across clean restarts and crashes.
        //
        // Note: On hard shutdown up to a few seconds of recent writes (since the last OS-initiated
        //       flush) may be lost from this node. However, since ledger data is replicated across
        //       the network, lost writes can be re-synced from peers during startup.
        options.sync = false;
        // Keep WAL enabled for crash recovery consistency.
        options.disableWAL = false;
        // Ensure RocksDB will not aggressive throttle the writes.
        options.low_pri = false;
        auto ret = m_db->Write(options, &wb);
        if (!ret.ok())
            Throw<std::runtime_error>("storeBatch failed: " + ret.ToString());
    }
--- a/src/test/nodestore/Database_test.cpp
+++ b/src/test/nodestore/Database_test.cpp
@@ -520,6 +520,13 @@ public:
        srcParams.set("type", srcBackendType);
        srcParams.set("path", node_db.path());
        beast::temp_dir dest_db;
        Section destParams;
        destParams.set("type", destBackendType);
        destParams.set("path", dest_db.path());
        testcase("import into '" + destBackendType + "' from '" + srcBackendType + "'");
        // Create a batch
        auto batch = createPredictableBatch(numObjectsToTest, seedValue);
@@ -538,16 +545,9 @@ public:
                Manager::instance().make_Database(megabytes(4), scheduler, 2, srcParams, journal_);
            // Set up the destination database
            beast::temp_dir dest_db;
            Section destParams;
            destParams.set("type", destBackendType);
            destParams.set("path", dest_db.path());
            std::unique_ptr<Database> dest =
                Manager::instance().make_Database(megabytes(4), scheduler, 2, destParams, journal_);
            testcase("import into '" + destBackendType + "' from '" + srcBackendType + "'");
            // Do the import
            dest->importDatabase(*src);
--- a/src/tests/libxrpl/CMakeLists.txt
+++ b/src/tests/libxrpl/CMakeLists.txt
@@ -53,3 +53,7 @@ if(NOT WIN32)
    target_link_libraries(xrpl.test.net PRIVATE xrpl.imports.test)
    add_dependencies(xrpl.tests xrpl.test.net)
 endif()
 xrpl_add_test(nodestore)
 target_link_libraries(xrpl.test.nodestore PRIVATE xrpl.imports.test)
 add_dependencies(xrpl.tests xrpl.test.nodestore)
--- a/src/tests/libxrpl/nodestore/BatchParallelism.cpp
+++ b/src/tests/libxrpl/nodestore/BatchParallelism.cpp
@@ -0,0 +1,334 @@
 #include <xrpl/nodestore/Backend.h>
 #include <gtest/gtest.h>
 #include <algorithm>
 #include <vector>
 using namespace xrpl;
 using namespace xrpl::NodeStore;
 // Helper function to convert the pair result into ranges for testing.
 std::vector<std::pair<unsigned int, unsigned int>>
 calculateRanges(unsigned int batchSize, unsigned int maxThreadCount)
 {
    auto const [numThreads, numItems] =
        Backend::calculateBatchParallelism(batchSize, maxThreadCount);
    std::vector<std::pair<unsigned int, unsigned int>> ranges;
    ranges.reserve(numThreads);
    for (unsigned int t = 0; t < numThreads; ++t)
    {
        auto const startIdx = t * numItems;
        auto const endIdx = std::min(startIdx + numItems, batchSize);
        ranges.emplace_back(startIdx, endIdx);
    }
    return ranges;
 }
 TEST(BatchParallelism, EmptyBatch)
 {
    // Empty batch should return 0 threads.
    {
        auto const batchSize = 0u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 0u);
        EXPECT_EQ(numItems, 0u);
        // Verify ranges calculation.
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        EXPECT_EQ(ranges.size(), numThreads);
    }
 }
 TEST(BatchParallelism, SmallBatches)
 {
    // Batch size 1 should use 1 thread.
    {
        auto const batchSize = 1u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 1u);
        EXPECT_EQ(numItems, 1u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        EXPECT_EQ(ranges[0].first, 0u);
        EXPECT_EQ(ranges[0].second, 1u);
    }
    // Batch size 2 should use 1 thread.
    {
        auto const batchSize = 2u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 1u);
        EXPECT_EQ(numItems, 2u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        EXPECT_EQ(ranges[0].first, 0u);
        EXPECT_EQ(ranges[0].second, 2u);
    }
    // Batch size 3 should use 1 thread.
    {
        auto const batchSize = 3u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 1u);
        EXPECT_EQ(numItems, 3u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        EXPECT_EQ(ranges[0].first, 0u);
        EXPECT_EQ(ranges[0].second, 3u);
    }
    // Batch size 4 should use 1 thread (exactly 4 items).
    {
        auto const batchSize = 4u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 1u);
        EXPECT_EQ(numItems, 4u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        EXPECT_EQ(ranges[0].first, 0u);
        EXPECT_EQ(ranges[0].second, 4u);
    }
 }
 TEST(BatchParallelism, MediumBatches)
 {
    // Batch size 5 should use 2 threads.
    {
        auto const batchSize = 5u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 2u);  // ceil(5/4) = 2
        EXPECT_EQ(numItems, 3u);    // ceil(5/2) = 3
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        EXPECT_EQ(ranges[0].first, 0u);
        EXPECT_EQ(ranges[0].second, 3u);
        EXPECT_EQ(ranges[1].first, 3u);
        EXPECT_EQ(ranges[1].second, 5u);
    }
    // Batch size 8 should use 2 threads.
    {
        auto const batchSize = 8u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 2u);
        EXPECT_EQ(numItems, 4u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        for (size_t i = 0; i < numThreads; ++i)
        {
            EXPECT_EQ(ranges[i].first, i * numItems);
            EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
        }
    }
    // Batch size 15 should use 4 threads (ceil(15/4) = 4).
    {
        auto const batchSize = 15u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 4u);
        EXPECT_EQ(numItems, 4u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        for (size_t i = 0; i < numThreads - 1; ++i)
        {
            EXPECT_EQ(ranges[i].first, i * numItems);
            EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
        }
        EXPECT_EQ(ranges[numThreads - 1].first, (numThreads - 1) * numItems);
        EXPECT_EQ(ranges[numThreads - 1].second, batchSize);  // Last range gets remaining items.
    }
    // Batch size 22 should use 6 threads.
    {
        auto const batchSize = 22u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 6u);  // ceil(22/4) = 6
        EXPECT_EQ(numItems, 4u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        for (size_t i = 0; i < numThreads - 1; ++i)
        {
            EXPECT_EQ(ranges[i].first, i * numItems);
            EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
        }
        EXPECT_EQ(ranges[numThreads - 1].first, (numThreads - 1) * numItems);
        EXPECT_EQ(ranges[numThreads - 1].second, batchSize);
    }
    // Batch size 32 should use 8 threads.
    {
        auto const batchSize = 32u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 8u);
        EXPECT_EQ(numItems, 4u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        for (size_t i = 0; i < numThreads; ++i)
        {
            EXPECT_EQ(ranges[i].first, i * numItems);
            EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
        }
    }
 }
 TEST(BatchParallelism, LargeBatches)
 {
    // Batch size 100 should use 8 threads (max limit).
    {
        auto const batchSize = 100u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 8u);
        EXPECT_EQ(numItems, 13u);  // ceil(100/8) = 13
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        for (size_t i = 0; i < numThreads - 1; ++i)
        {
            EXPECT_EQ(ranges[i].first, i * numItems);
            EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
        }
        EXPECT_EQ(ranges[numThreads - 1].first, (numThreads - 1) * numItems);
        EXPECT_EQ(ranges[numThreads - 1].second, batchSize);
    }
    // Batch size 1000 with 8 hw threads.
    {
        auto const batchSize = 1000u;
        auto const maxThreadCount = 8u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 8u);
        EXPECT_EQ(numItems, 125u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        for (size_t i = 0; i < numThreads; ++i)
        {
            EXPECT_EQ(ranges[i].first, i * numItems);
            EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
        }
    }
 }
 TEST(BatchParallelism, HardwareThreadLimits)
 {
    // With only 1 thread available.
    {
        auto const batchSize = 100u;
        auto const maxThreadCount = 1u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 1u);
        EXPECT_EQ(numItems, 100u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        EXPECT_EQ(ranges[0].first, 0u);
        EXPECT_EQ(ranges[0].second, 100u);
    }
    // With 2 threads.
    {
        auto const batchSize = 50u;
        auto const maxThreadCount = 2u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 2u);
        EXPECT_EQ(numItems, 25u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        for (size_t i = 0; i < numThreads; ++i)
        {
            EXPECT_EQ(ranges[i].first, i * numItems);
            EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
        }
    }
    // With 10 threads.
    {
        auto const batchSize = 50u;
        auto const maxThreadCount = 12u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 10u);  // ceil(50/4) = 13, but numThreads = 10.
        EXPECT_EQ(numItems, 5u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        for (size_t i = 0; i < numThreads; ++i)
        {
            EXPECT_EQ(ranges[i].first, i * numItems);
            EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
        }
    }
    // With many threads.
    {
        auto const batchSize = 20u;
        auto const maxThreadCount = 100u;
        auto const [numThreads, numItems] =
            Backend::calculateBatchParallelism(batchSize, maxThreadCount);
        EXPECT_EQ(numThreads, 5u);  // ceil(20/4) = 5, limited by batch size.
        EXPECT_EQ(numItems, 4u);
        auto const ranges = calculateRanges(batchSize, maxThreadCount);
        ASSERT_EQ(ranges.size(), numThreads);
        for (size_t i = 0; i < numThreads; ++i)
        {
            EXPECT_EQ(ranges[i].first, i * numItems);
            EXPECT_EQ(ranges[i].second, (i + 1) * numItems);
        }
    }
 }
--- a/src/tests/libxrpl/nodestore/main.cpp
+++ b/src/tests/libxrpl/nodestore/main.cpp
@@ -0,0 +1,8 @@
 #include <gtest/gtest.h>
 int
 main(int argc, char** argv)
 {
    ::testing::InitGoogleTest(&argc, argv);
    return RUN_ALL_TESTS();
 }
Author	SHA1	Message	Date
Bart	b55e344205	Defensive checks	2026-03-21 20:06:55 -04:00
Bart	8a896f318a	Add tests, add gracefull stopping	2026-03-21 18:57:03 -04:00
Bart	0e26aadfe6	Make pending writes size_t to avoid casting everywhere	2026-03-21 10:24:24 -04:00
Bart	d029bcf2d0	Threadpool join in destructor, exception handling	2026-03-21 10:21:24 -04:00
Bart	403cab41e9	Fix thread parallelization calculation	2026-03-20 18:03:17 -04:00
Bart	59e6fbfe12	Merge branch 'develop' into bthomee/iops	2026-03-20 16:04:00 -04:00
Bart	d63f80f73c	Merge branch 'develop' into bthomee/iops	2026-03-19 17:28:32 -04:00
Bart	dc5eb0ea50	Make class variables private	2026-03-19 08:36:24 -04:00
Bart	609024f15c	Merge branch 'develop' into bthomee/iops	2026-03-19 06:32:59 -04:00
Bart	1bf5b0aa10	Add braces	2026-03-18 14:52:06 -04:00
Bart	f783a15bc8	Review feedback	2026-03-18 14:20:06 -04:00
Bart	5a94948a04	Merge branch 'develop' into bthomee/iops	2026-03-18 14:03:28 -04:00
Bart	f586382622	perf: Improve IOPS when reading from and writing to NuDB and RocksDB	2026-03-17 17:56:59 -04:00