Rework deferred node logic and async fetch behavior

This comment explains this patch and the associated patches that should be folded into it. This paragraph should be removed when the patches are folded after review. This change significantly improves ledger sync and fetch times while reducing memory consumption. The change affects the code from that begins with SHAMap::getMissingNodes and runs through to Database::threadEntry. The existing code issues a number of async fetches which are then handed off to the Database's pool of read threads to execute. The results of each read are placed in the Database's positive and negative caches. The caller waits for all reads to complete and then retrieves the results out of these caches. Among other issues, this means that the results of the first read cannot be processed until the last read completes. Additionally, all the results must sit in memory. This patch changes the behavior so that each read operation has a completion handler associated with it. The completion of the read calls the handler, allowing the results of each read to be processed as it completes. As this was the only reason the negative and positive caches were needed, they can now be removed. The read generation code is also no longer needed and is removed. The batch fetch logic was never implemented or supported and is removed.
2025-11-25 21:45:52 +00:00 · 2020-11-16 22:38:31 -08:00
parent 7b192945eb
commit 02ccdeb94e
28 changed files with 275 additions and 750 deletions
--- a/src/ripple/nodestore/impl/Shard.cpp
+++ b/src/ripple/nodestore/impl/Shard.cpp
@@ -176,8 +176,6 @@ Shard::tryClose()
    acquireInfo_.reset();

    // Reset caches to reduce memory use
-    pCache_->reset();
-    nCache_->reset();
    app_.getShardFamily()->getFullBelowCache(lastSeq_)->reset();
    app_.getShardFamily()->getTreeNodeCache(lastSeq_)->reset();

@@ -225,8 +223,6 @@ Shard::storeNodeObject(std::shared_ptr<NodeObject> const& nodeObject)
    if (!scopedCount)
        return false;

-    pCache_->canonicalize_replace_cache(nodeObject->getHash(), nodeObject);
-
    try
    {
        backend_->store(nodeObject);
@@ -239,7 +235,6 @@ Shard::storeNodeObject(std::shared_ptr<NodeObject> const& nodeObject)
        return false;
    }

-    nCache_->erase(nodeObject->getHash());
    return true;
 }

@@ -250,82 +245,47 @@ Shard::fetchNodeObject(uint256 const& hash, FetchReport& fetchReport)
    if (!scopedCount)
        return nullptr;

-    // See if the node object exists in the cache
-    auto nodeObject{pCache_->fetch(hash)};
-    if (!nodeObject && !nCache_->touch_if_exists(hash))
+    std::shared_ptr<NodeObject> nodeObject;
+
+    // Try the backend
+    Status status;
+    try
    {
-        // Try the backend
-        fetchReport.wentToDisk = true;
+        status = backend_->fetch(hash.data(), &nodeObject);
+    }
+    catch (std::exception const& e)
+    {
+        JLOG(j_.fatal()) << "shard " << index_
+                         << ". Exception caught in function " << __func__
+                         << ". Error: " << e.what();
+        return nullptr;
+    }

-        Status status;
-        try
-        {
-            status = backend_->fetch(hash.data(), &nodeObject);
-        }
-        catch (std::exception const& e)
-        {
+    switch (status)
+    {
+        case ok:
+        case notFound:
+            break;
+        case dataCorrupt: {
            JLOG(j_.fatal())
-                << "shard " << index_ << ". Exception caught in function "
-                << __func__ << ". Error: " << e.what();
-            return nullptr;
+                << "shard " << index_ << ". Corrupt node object at hash "
+                << to_string(hash);
+            break;
        }
-
-        switch (status)
-        {
-            case ok:
-            case notFound:
-                break;
-            case dataCorrupt: {
-                JLOG(j_.fatal())
-                    << "shard " << index_ << ". Corrupt node object at hash "
-                    << to_string(hash);
-                break;
-            }
-            default: {
-                JLOG(j_.warn())
-                    << "shard " << index_ << ". Unknown status=" << status
-                    << " fetching node object at hash " << to_string(hash);
-                break;
-            }
-        }
-
-        if (!nodeObject)
-        {
-            // Just in case a write occurred
-            nodeObject = pCache_->fetch(hash);
-            if (!nodeObject)
-                // We give up
-                nCache_->insert(hash);
-        }
-        else
-        {
-            // Ensure all threads get the same object
-            pCache_->canonicalize_replace_client(hash, nodeObject);
-            fetchReport.wasFound = true;
-
-            // Since this was a 'hard' fetch, we will log it
-            JLOG(j_.trace()) << "HOS: " << hash << " fetch: in shard db";
+        default: {
+            JLOG(j_.warn())
+                << "shard " << index_ << ". Unknown status=" << status
+                << " fetching node object at hash " << to_string(hash);
+            break;
        }
    }

+    if (nodeObject)
+        fetchReport.wasFound = true;
+
    return nodeObject;
 }

-bool
-Shard::fetchNodeObjectFromCache(
-    uint256 const& hash,
-    std::shared_ptr<NodeObject>& nodeObject)
-{
-    auto const scopedCount{makeBackendCount()};
-    if (!scopedCount)
-        return false;
-
-    nodeObject = pCache_->fetch(hash);
-    if (nodeObject || nCache_->touch_if_exists(hash))
-        return true;
-    return false;
-}
-
 Shard::StoreLedgerResult
 Shard::storeLedger(
    std::shared_ptr<Ledger const> const& srcLedger,
@@ -369,12 +329,7 @@ Shard::storeLedger(
    auto storeBatch = [&]() {
        std::uint64_t sz{0};
        for (auto const& nodeObject : batch)
-        {
-            pCache_->canonicalize_replace_cache(
-                nodeObject->getHash(), nodeObject);
-            nCache_->erase(nodeObject->getHash());
            sz += nodeObject->getData().size();
-        }

        try
        {
@@ -530,38 +485,7 @@ Shard::containsLedger(std::uint32_t ledgerSeq) const
 void
 Shard::sweep()
 {
-    boost::optional<Shard::Count> scopedCount;
-    {
-        std::lock_guard lock(mutex_);
-        if (!backend_ || !backend_->isOpen())
-        {
-            JLOG(j_.error()) << "shard " << index_ << " not initialized";
-            return;
-        }
-
-        scopedCount.emplace(&backendCount_);
-    }
-
-    pCache_->sweep();
-    nCache_->sweep();
-}
-
-int
-Shard::getDesiredAsyncReadCount()
-{
-    auto const scopedCount{makeBackendCount()};
-    if (!scopedCount)
-        return 0;
-    return pCache_->getTargetSize() / asyncDivider;
-}
-
-float
-Shard::getCacheHitRate()
-{
-    auto const scopedCount{makeBackendCount()};
-    if (!scopedCount)
-        return 0;
-    return pCache_->getHitRate();
+    // nothing to do
 }

 std::chrono::steady_clock::time_point
@@ -712,8 +636,6 @@ Shard::finalize(
    auto const treeNodeCache{shardFamily.getTreeNodeCache(lastSeq_)};

    // Reset caches to reduce memory usage
-    pCache_->reset();
-    nCache_->reset();
    fullBelowCache->reset();
    treeNodeCache->reset();

@@ -767,8 +689,6 @@ Shard::finalize(
        next = std::move(ledger);
        --ledgerSeq;

-        pCache_->reset();
-        nCache_->reset();
        fullBelowCache->reset();
        treeNodeCache->reset();
    }
@@ -859,9 +779,6 @@ Shard::open(std::lock_guard<std::mutex> const& lock)
        txSQLiteDB_.reset();
        acquireInfo_.reset();

-        pCache_.reset();
-        nCache_.reset();
-
        state_ = acquire;

        if (!preexist)
@@ -982,14 +899,6 @@ Shard::open(std::lock_guard<std::mutex> const& lock)
            ". Error: " + e.what());
    }

-    // Set backend caches
-    auto const size{config.getValueFor(SizedItem::nodeCacheSize, 0)};
-    auto const age{
-        std::chrono::seconds{config.getValueFor(SizedItem::nodeCacheAge, 0)}};
-    auto const name{"shard " + std::to_string(index_)};
-    pCache_ = std::make_unique<PCache>(name, size, age, stopwatch(), j_);
-    nCache_ = std::make_unique<NCache>(name, stopwatch(), size, age);
-
    if (!initSQLite(lock))
        return fail({});