perf: Remove unnecessary caches (#5439)

This change removes the cache in `DatabaseNodeImp` and simplifies the caching logic in `SHAMapStoreImp`. As NuDB and RocksDB internally already use caches, additional caches in the code are not very valuable or may even be unnecessary, as also confirmed during preliminary performance analyses.
2026-02-07 15:32:34 +00:00 · 2026-02-06 09:42:35 -05:00
parent 25d7c2c4ec
commit 677758b1cc
10 changed files with 47 additions and 211 deletions
--- a/cfg/xrpld-example.cfg
+++ b/cfg/xrpld-example.cfg
@@ -940,23 +940,7 @@
 #
 #       path                Location to store the database
 #
-#   Optional keys
-#
-#       cache_size          Size of cache for database records. Default is 16384.
-#                           Setting this value to 0 will use the default value.
-#
-#       cache_age           Length of time in minutes to keep database records
-#                           cached. Default is 5 minutes. Setting this value to
-#                           0 will use the default value.
-#
-#                           Note: if neither cache_size nor cache_age is
-#                           specified, the cache for database records will not
-#                           be created. If only one of cache_size or cache_age
-#                           is specified, the cache will be created using the
-#                           default value for the unspecified parameter.
-#
-#                           Note: the cache will not be created if online_delete
-#                           is specified.
+#   Optional keys for NuDB and RocksDB:
 #
 #       fast_load           Boolean. If set, load the last persisted ledger
 #                           from disk upon process start before syncing to
@@ -964,8 +948,6 @@
 #                           if sufficient IOPS capacity is available.
 #                           Default 0.
 #
-#   Optional keys for NuDB or RocksDB:
-#
 #       earliest_seq        The default is 32570 to match the XRP ledger
 #                           network's earliest allowed sequence. Alternate
 #                           networks may set this value. Minimum value of 1.
--- a/include/xrpl/nodestore/Database.h
+++ b/include/xrpl/nodestore/Database.h
@@ -133,10 +133,6 @@ public:
        std::uint32_t ledgerSeq,
        std::function<void(std::shared_ptr<NodeObject> const&)>&& callback);

-    /** Remove expired entries from the positive and negative caches. */
-    virtual void
-    sweep() = 0;
-
    /** Gather statistics pertaining to read and write activities.
     *
     * @param obj Json object reference into which to place counters.
--- a/include/xrpl/nodestore/detail/DatabaseNodeImp.h
+++ b/include/xrpl/nodestore/detail/DatabaseNodeImp.h
@@ -23,32 +23,6 @@ public:
        beast::Journal j)
        : Database(scheduler, readThreads, config, j), backend_(std::move(backend))
    {
-        std::optional<int> cacheSize, cacheAge;
-
-        if (config.exists("cache_size"))
-        {
-            cacheSize = get<int>(config, "cache_size");
-            if (cacheSize.value() < 0)
-            {
-                Throw<std::runtime_error>("Specified negative value for cache_size");
-            }
-        }
-
-        if (config.exists("cache_age"))
-        {
-            cacheAge = get<int>(config, "cache_age");
-            if (cacheAge.value() < 0)
-            {
-                Throw<std::runtime_error>("Specified negative value for cache_age");
-            }
-        }
-
-        if (cacheSize != 0 || cacheAge != 0)
-        {
-            cache_ = std::make_shared<TaggedCache<uint256, NodeObject>>(
-                "DatabaseNodeImp", cacheSize.value_or(0), std::chrono::minutes(cacheAge.value_or(0)), stopwatch(), j);
-        }
-
        XRPL_ASSERT(
            backend_,
            "xrpl::NodeStore::DatabaseNodeImp::DatabaseNodeImp : non-null "
@@ -103,13 +77,7 @@ public:
        std::uint32_t ledgerSeq,
        std::function<void(std::shared_ptr<NodeObject> const&)>&& callback) override;

-    void
-    sweep() override;
-
 private:
-    // Cache for database objects. This cache is not always initialized. Check
-    // for null before using.
-    std::shared_ptr<TaggedCache<uint256, NodeObject>> cache_;
    // Persistent key/value storage
    std::shared_ptr<Backend> backend_;

--- a/include/xrpl/nodestore/detail/DatabaseRotatingImp.h
+++ b/include/xrpl/nodestore/detail/DatabaseRotatingImp.h
@@ -55,9 +55,6 @@ public:
    void
    sync() override;

-    void
-    sweep() override;
-
 private:
    std::shared_ptr<Backend> writableBackend_;
    std::shared_ptr<Backend> archiveBackend_;
--- a/src/libxrpl/nodestore/DatabaseNodeImp.cpp
+++ b/src/libxrpl/nodestore/DatabaseNodeImp.cpp
@@ -10,11 +10,6 @@ DatabaseNodeImp::store(NodeObjectType type, Blob&& data, uint256 const& hash, st

    auto obj = NodeObject::createObject(type, std::move(data), hash);
    backend_->store(obj);
-    if (cache_)
-    {
-        // After the store, replace a negative cache entry if there is one
-        cache_->canonicalize(hash, obj, [](std::shared_ptr<NodeObject> const& n) { return n->getType() == hotDUMMY; });
-    }
 }

 void
@@ -23,77 +18,36 @@ DatabaseNodeImp::asyncFetch(
    std::uint32_t ledgerSeq,
    std::function<void(std::shared_ptr<NodeObject> const&)>&& callback)
 {
-    if (cache_)
-    {
-        std::shared_ptr<NodeObject> obj = cache_->fetch(hash);
-        if (obj)
-        {
-            callback(obj->getType() == hotDUMMY ? nullptr : obj);
-            return;
-        }
-    }
    Database::asyncFetch(hash, ledgerSeq, std::move(callback));
 }

-void
-DatabaseNodeImp::sweep()
-{
-    if (cache_)
-        cache_->sweep();
-}
-
 std::shared_ptr<NodeObject>
 DatabaseNodeImp::fetchNodeObject(uint256 const& hash, std::uint32_t, FetchReport& fetchReport, bool duplicate)
 {
-    std::shared_ptr<NodeObject> nodeObject = cache_ ? cache_->fetch(hash) : nullptr;
+    std::shared_ptr<NodeObject> nodeObject = nullptr;
+    Status status;

-    if (!nodeObject)
+    try
    {
-        JLOG(j_.trace()) << "fetchNodeObject " << hash << ": record not " << (cache_ ? "cached" : "found");
-
-        Status status;
-
-        try
-        {
-            status = backend_->fetch(hash.data(), &nodeObject);
-        }
-        catch (std::exception const& e)
-        {
-            JLOG(j_.fatal()) << "fetchNodeObject " << hash << ": Exception fetching from backend: " << e.what();
-            Rethrow();
-        }
-
-        switch (status)
-        {
-            case ok:
-                if (cache_)
-                {
-                    if (nodeObject)
-                        cache_->canonicalize_replace_client(hash, nodeObject);
-                    else
-                    {
-                        auto notFound = NodeObject::createObject(hotDUMMY, {}, hash);
-                        cache_->canonicalize_replace_client(hash, notFound);
-                        if (notFound->getType() != hotDUMMY)
-                            nodeObject = notFound;
-                    }
-                }
-                break;
-            case notFound:
-                break;
-            case dataCorrupt:
-                JLOG(j_.fatal()) << "fetchNodeObject " << hash << ": nodestore data is corrupted";
-                break;
-            default:
-                JLOG(j_.warn()) << "fetchNodeObject " << hash << ": backend returns unknown result " << status;
-                break;
-        }
+        status = backend_->fetch(hash.data(), &nodeObject);
    }
-    else
+    catch (std::exception const& e)
    {
-        JLOG(j_.trace()) << "fetchNodeObject " << hash << ": record found in cache";
-        if (nodeObject->getType() == hotDUMMY)
-            nodeObject.reset();
+        JLOG(j_.fatal()) << "fetchNodeObject " << hash << ": Exception fetching from backend: " << e.what();
+        Rethrow();
+    }
+
+    switch (status)
+    {
+        case ok:
+        case notFound:
+            break;
+        case dataCorrupt:
+            JLOG(j_.fatal()) << "fetchNodeObject " << hash << ": nodestore data is corrupted";
+            break;
+        default:
+            JLOG(j_.warn()) << "fetchNodeObject " << hash << ": backend returns unknown result " << status;
+            break;
    }

    if (nodeObject)
@@ -105,66 +59,36 @@ DatabaseNodeImp::fetchNodeObject(uint256 const& hash, std::uint32_t, FetchReport
 std::vector<std::shared_ptr<NodeObject>>
 DatabaseNodeImp::fetchBatch(std::vector<uint256> const& hashes)
 {
-    std::vector<std::shared_ptr<NodeObject>> results{hashes.size()};
    using namespace std::chrono;
    auto const before = steady_clock::now();
-    std::unordered_map<uint256 const*, size_t> indexMap;
-    std::vector<uint256 const*> cacheMisses;
-    uint64_t hits = 0;
-    uint64_t fetches = 0;
+
+    std::vector<uint256 const*> batch{};
+    batch.reserve(hashes.size());
    for (size_t i = 0; i < hashes.size(); ++i)
    {
        auto const& hash = hashes[i];
-        // See if the object already exists in the cache
-        auto nObj = cache_ ? cache_->fetch(hash) : nullptr;
-        ++fetches;
-        if (!nObj)
-        {
-            // Try the database
-            indexMap[&hash] = i;
-            cacheMisses.push_back(&hash);
-        }
-        else
-        {
-            results[i] = nObj->getType() == hotDUMMY ? nullptr : nObj;
-            // It was in the cache.
-            ++hits;
-        }
+        batch.push_back(&hash);
    }

-    JLOG(j_.debug()) << "fetchBatch - cache hits = " << (hashes.size() - cacheMisses.size())
-                     << " - cache misses = " << cacheMisses.size();
-    auto dbResults = backend_->fetchBatch(cacheMisses).first;
-
-    for (size_t i = 0; i < dbResults.size(); ++i)
+    // Get the node objects that match the hashes from the backend. To protect
+    // against the backends returning fewer or more results than expected, the
+    // container is resized to the number of hashes.
+    auto results = backend_->fetchBatch(batch).first;
+    XRPL_ASSERT(
+        results.size() == hashes.size() || results.empty(),
+        "number of output objects either matches number of input hashes or is empty");
+    results.resize(hashes.size());
+    for (size_t i = 0; i < results.size(); ++i)
    {
-        auto nObj = std::move(dbResults[i]);
-        size_t index = indexMap[cacheMisses[i]];
-        auto const& hash = hashes[index];
-
-        if (nObj)
-        {
-            // Ensure all threads get the same object
-            if (cache_)
-                cache_->canonicalize_replace_client(hash, nObj);
-        }
-        else
+        if (!results[i])
        {
            JLOG(j_.error()) << "fetchBatch - "
-                             << "record not found in db or cache. hash = " << strHex(hash);
-            if (cache_)
-            {
-                auto notFound = NodeObject::createObject(hotDUMMY, {}, hash);
-                cache_->canonicalize_replace_client(hash, notFound);
-                if (notFound->getType() != hotDUMMY)
-                    nObj = std::move(notFound);
-            }
+                             << "record not found in db. hash = " << strHex(hashes[i]);
        }
-        results[index] = std::move(nObj);
    }

    auto fetchDurationUs = std::chrono::duration_cast<std::chrono::microseconds>(steady_clock::now() - before).count();
-    updateFetchMetrics(fetches, hits, fetchDurationUs);
+    updateFetchMetrics(hashes.size(), 0, fetchDurationUs);
    return results;
 }

--- a/src/libxrpl/nodestore/DatabaseRotatingImp.cpp
+++ b/src/libxrpl/nodestore/DatabaseRotatingImp.cpp
@@ -93,12 +93,6 @@ DatabaseRotatingImp::store(NodeObjectType type, Blob&& data, uint256 const& hash
    storeStats(1, nObj->getData().size());
 }

-void
-DatabaseRotatingImp::sweep()
-{
-    // nothing to do
-}
-
 std::shared_ptr<NodeObject>
 DatabaseRotatingImp::fetchNodeObject(uint256 const& hash, std::uint32_t, FetchReport& fetchReport, bool duplicate)
 {
--- a/src/test/app/SHAMapStore_test.cpp
+++ b/src/test/app/SHAMapStore_test.cpp
@@ -490,19 +490,8 @@ public:
        Env env(*this, envconfig(onlineDelete));

        /////////////////////////////////////////////////////////////
-        // Create the backend. Normally, SHAMapStoreImp handles all these
-        // details
-        auto nscfg = env.app().config().section(ConfigSection::nodeDatabase());
-
-        // Provide default values:
-        if (!nscfg.exists("cache_size"))
-            nscfg.set(
-                "cache_size", std::to_string(env.app().config().getValueFor(SizedItem::treeCacheSize, std::nullopt)));
-
-        if (!nscfg.exists("cache_age"))
-            nscfg.set(
-                "cache_age", std::to_string(env.app().config().getValueFor(SizedItem::treeCacheAge, std::nullopt)));
-
+        // Create NodeStore with two backends to allow online deletion of data.
+        // Normally, SHAMapStoreImp handles all these details.
        NodeStoreScheduler scheduler(env.app().getJobQueue());

        std::string const writableDb = "write";
@@ -510,9 +499,8 @@ public:
        auto writableBackend = makeBackendRotating(env, scheduler, writableDb);
        auto archiveBackend = makeBackendRotating(env, scheduler, archiveDb);

-        // Create NodeStore with two backends to allow online deletion of
-        // data
        constexpr int readThreads = 4;
+        auto nscfg = env.app().config().section(ConfigSection::nodeDatabase());
        auto dbr = std::make_unique<NodeStore::DatabaseRotatingImp>(
            scheduler,
            readThreads,
--- a/src/xrpld/app/main/Application.cpp
+++ b/src/xrpld/app/main/Application.cpp
@@ -908,10 +908,6 @@ public:
            JLOG(m_journal.debug()) << "MasterTransaction sweep.  Size before: " << oldMasterTxSize
                                    << "; size after: " << masterTxCache.size();
        }
-        {
-            // Does not appear to have an associated cache.
-            getNodeStore().sweep();
-        }
        {
            std::size_t const oldLedgerMasterCacheSize = getLedgerMaster().getFetchPackCacheSize();

--- a/src/xrpld/app/misc/SHAMapStoreImp.cpp
+++ b/src/xrpld/app/misc/SHAMapStoreImp.cpp
@@ -130,14 +130,6 @@ std::unique_ptr<NodeStore::Database>
 SHAMapStoreImp::makeNodeStore(int readThreads)
 {
    auto nscfg = app_.config().section(ConfigSection::nodeDatabase());
-
-    // Provide default values:
-    if (!nscfg.exists("cache_size"))
-        nscfg.set("cache_size", std::to_string(app_.config().getValueFor(SizedItem::treeCacheSize, std::nullopt)));
-
-    if (!nscfg.exists("cache_age"))
-        nscfg.set("cache_age", std::to_string(app_.config().getValueFor(SizedItem::treeCacheAge, std::nullopt)));
-
    std::unique_ptr<NodeStore::Database> db;

    if (deleteInterval_)
@@ -226,8 +218,6 @@ SHAMapStoreImp::run()
    LedgerIndex lastRotated = state_db_.getState().lastRotated;
    netOPs_ = &app_.getOPs();
    ledgerMaster_ = &app_.getLedgerMaster();
-    fullBelowCache_ = &(*app_.getNodeFamily().getFullBelowCache());
-    treeNodeCache_ = &(*app_.getNodeFamily().getTreeNodeCache());

    if (advisoryDelete_)
        canDelete_ = state_db_.getCanDelete();
@@ -490,16 +480,19 @@ void
 SHAMapStoreImp::clearCaches(LedgerIndex validatedSeq)
 {
    ledgerMaster_->clearLedgerCachePrior(validatedSeq);
-    fullBelowCache_->clear();
+    // Also clear the FullBelowCache so its generation counter is bumped.
+    // This prevents stale "full below" markers from persisting across
+    // backend rotation/online deletion and interfering with SHAMap sync.
+    app_.getNodeFamily().getFullBelowCache()->clear();
 }

 void
 SHAMapStoreImp::freshenCaches()
 {
-    if (freshenCache(*treeNodeCache_))
-        return;
-    if (freshenCache(app_.getMasterTransaction().getCache()))
+    if (freshenCache(*app_.getNodeFamily().getTreeNodeCache()))
        return;
+
+    freshenCache(app_.getMasterTransaction().getCache());
 }

 void
--- a/src/xrpld/app/misc/SHAMapStoreImp.h
+++ b/src/xrpld/app/misc/SHAMapStoreImp.h
@@ -93,8 +93,6 @@ private:
    // as of run() or before
    NetworkOPs* netOPs_ = nullptr;
    LedgerMaster* ledgerMaster_ = nullptr;
-    FullBelowCache* fullBelowCache_ = nullptr;
-    TreeNodeCache* treeNodeCache_ = nullptr;

    static constexpr auto nodeStoreName_ = "NodeStore";