Rework deferred node logic and async fetch behavior

This comment explains this patch and the associated patches
that should be folded into it. This paragraph should be removed
when the patches are folded after review.

This change significantly improves ledger sync and fetch
times while reducing memory consumption. The change affects
the code from that begins with SHAMap::getMissingNodes and runs
through to Database::threadEntry.

The existing code issues a number of async fetches which are then
handed off to the Database's pool of read threads to execute.
The results of each read are placed in the Database's positive
and negative caches. The caller waits for all reads to complete
and then retrieves the results out of these caches.

Among other issues, this means that the results of the first read
cannot be processed until the last read completes. Additionally,
all the results must sit in memory.

This patch changes the behavior so that each read operation has a
completion handler associated with it. The completion of the read
calls the handler, allowing the results of each read to be
processed as it completes. As this was the only reason the
negative and positive caches were needed, they can now be removed.

The read generation code is also no longer needed and is removed.
The batch fetch logic was never implemented or supported and is
removed.
This commit is contained in:
JoelKatz
2020-11-16 22:38:31 -08:00
committed by Nik Bougalis
parent 7b192945eb
commit 02ccdeb94e
28 changed files with 275 additions and 750 deletions

View File

@@ -964,9 +964,6 @@ public:
// tune caches // tune caches
using namespace std::chrono; using namespace std::chrono;
m_nodeStore->tune(
config_->getValueFor(SizedItem::nodeCacheSize),
seconds{config_->getValueFor(SizedItem::nodeCacheAge)});
m_ledgerMaster->tune( m_ledgerMaster->tune(
config_->getValueFor(SizedItem::ledgerSize), config_->getValueFor(SizedItem::ledgerSize),

View File

@@ -78,12 +78,11 @@ NodeStoreScheduler::doTask(NodeStore::Task& task)
void void
NodeStoreScheduler::onFetch(NodeStore::FetchReport const& report) NodeStoreScheduler::onFetch(NodeStore::FetchReport const& report)
{ {
if (report.wentToDisk) m_jobQueue->addLoadEvents(
m_jobQueue->addLoadEvents( report.fetchType == NodeStore::FetchType::async ? jtNS_ASYNC_READ
report.fetchType == NodeStore::FetchType::async ? jtNS_ASYNC_READ : jtNS_SYNC_READ,
: jtNS_SYNC_READ, 1,
1, report.elapsed);
report.elapsed);
} }
void void

View File

@@ -639,8 +639,6 @@ SHAMapStoreImp::clearCaches(LedgerIndex validatedSeq)
void void
SHAMapStoreImp::freshenCaches() SHAMapStoreImp::freshenCaches()
{ {
if (freshenCache(dbRotating_->getPositiveCache()))
return;
if (freshenCache(*treeNodeCache_)) if (freshenCache(*treeNodeCache_))
return; return;
if (freshenCache(app_.getMasterTransaction().getCache())) if (freshenCache(app_.getMasterTransaction().getCache()))

View File

@@ -52,8 +52,6 @@ enum class SizedItem : std::size_t {
ledgerSize, ledgerSize,
ledgerAge, ledgerAge,
ledgerFetch, ledgerFetch,
nodeCacheSize,
nodeCacheAge,
hashNodeDBCache, hashNodeDBCache,
txnDBCache, txnDBCache,
lgrDBCache, lgrDBCache,

View File

@@ -41,7 +41,7 @@
namespace ripple { namespace ripple {
// The configurable node sizes are "tiny", "small", "medium", "large", "huge" // The configurable node sizes are "tiny", "small", "medium", "large", "huge"
inline constexpr std::array<std::pair<SizedItem, std::array<int, 5>>, 13> inline constexpr std::array<std::pair<SizedItem, std::array<int, 5>>, 11>
sizedItems{{ sizedItems{{
// FIXME: We should document each of these items, explaining exactly // FIXME: We should document each of these items, explaining exactly
// what // what
@@ -53,8 +53,6 @@ inline constexpr std::array<std::pair<SizedItem, std::array<int, 5>>, 13>
{SizedItem::ledgerSize, {{32, 128, 256, 384, 768}}}, {SizedItem::ledgerSize, {{32, 128, 256, 384, 768}}},
{SizedItem::ledgerAge, {{30, 90, 180, 240, 900}}}, {SizedItem::ledgerAge, {{30, 90, 180, 240, 900}}},
{SizedItem::ledgerFetch, {{2, 3, 4, 5, 8}}}, {SizedItem::ledgerFetch, {{2, 3, 4, 5, 8}}},
{SizedItem::nodeCacheSize, {{16384, 32768, 131072, 262144, 524288}}},
{SizedItem::nodeCacheAge, {{60, 90, 120, 900, 1800}}},
{SizedItem::hashNodeDBCache, {{4, 12, 24, 64, 128}}}, {SizedItem::hashNodeDBCache, {{4, 12, 24, 64, 128}}},
{SizedItem::txnDBCache, {{4, 12, 24, 64, 128}}}, {SizedItem::txnDBCache, {{4, 12, 24, 64, 128}}},
{SizedItem::lgrDBCache, {{4, 8, 16, 32, 128}}}, {SizedItem::lgrDBCache, {{4, 8, 16, 32, 128}}},

View File

@@ -80,14 +80,6 @@ public:
virtual Status virtual Status
fetch(void const* key, std::shared_ptr<NodeObject>* pObject) = 0; fetch(void const* key, std::shared_ptr<NodeObject>* pObject) = 0;
/** Return `true` if batch fetches are optimized. */
virtual bool
canFetchBatch() = 0;
/** Fetch a batch synchronously. */
virtual std::vector<std::shared_ptr<NodeObject>>
fetchBatch(std::size_t n, void const* const* keys) = 0;
/** Store a single object. /** Store a single object.
Depending on the implementation this may happen immediately Depending on the implementation this may happen immediately
or deferred using a scheduled task. or deferred using a scheduled task.

View File

@@ -26,7 +26,6 @@
#include <ripple/nodestore/Backend.h> #include <ripple/nodestore/Backend.h>
#include <ripple/nodestore/NodeObject.h> #include <ripple/nodestore/NodeObject.h>
#include <ripple/nodestore/Scheduler.h> #include <ripple/nodestore/Scheduler.h>
#include <ripple/nodestore/impl/Tuning.h>
#include <ripple/protocol/SystemParameters.h> #include <ripple/protocol/SystemParameters.h>
#include <thread> #include <thread>
@@ -114,6 +113,21 @@ public:
uint256 const& hash, uint256 const& hash,
std::uint32_t ledgerSeq) = 0; std::uint32_t ledgerSeq) = 0;
/* Check if two ledgers are in the same database
If these two sequence numbers map to the same database,
the result of a fetch with either sequence number would
be identical.
@param s1 The first sequence number
@param s2 The second sequence number
@return 'true' if both ledgers would be in the same DB
*/
virtual bool
isSameDB(std::uint32_t s1, std::uint32_t s2) = 0;
/** Fetch a node object. /** Fetch a node object.
If the object is known to be not in the database, isn't found in the If the object is known to be not in the database, isn't found in the
database during the fetch, or failed to load correctly during the fetch, database during the fetch, or failed to load correctly during the fetch,
@@ -135,20 +149,19 @@ public:
If I/O is required to determine whether or not the object is present, If I/O is required to determine whether or not the object is present,
`false` is returned. Otherwise, `true` is returned and `object` is set `false` is returned. Otherwise, `true` is returned and `object` is set
to refer to the object, or `nullptr` if the object is not present. to refer to the object, or `nullptr` if the object is not present.
If I/O is required, the I/O is scheduled. If I/O is required, the I/O is scheduled and `true` is returned
@note This can be called concurrently. @note This can be called concurrently.
@param hash The key of the object to retrieve @param hash The key of the object to retrieve
@param ledgerSeq The sequence of the ledger where the @param ledgerSeq The sequence of the ledger where the
object is stored, used by the shard store. object is stored, used by the shard store.
@param nodeObject The object retrieved @param callback Callback function when read completes
@return Whether the operation completed
*/ */
virtual bool void
asyncFetch( asyncFetch(
uint256 const& hash, uint256 const& hash,
std::uint32_t ledgerSeq, std::uint32_t ledgerSeq,
std::shared_ptr<NodeObject>& nodeObject) = 0; std::function<void(std::shared_ptr<NodeObject> const&)>&& callback);
/** Store a ledger from a different database. /** Store a ledger from a different database.
@@ -158,32 +171,6 @@ public:
virtual bool virtual bool
storeLedger(std::shared_ptr<Ledger const> const& srcLedger) = 0; storeLedger(std::shared_ptr<Ledger const> const& srcLedger) = 0;
/** Wait for all currently pending async reads to complete.
*/
void
waitReads();
/** Get the maximum number of async reads the node store prefers.
@param ledgerSeq A ledger sequence specifying a shard to query.
@return The number of async reads preferred.
@note The sequence is only used with the shard store.
*/
virtual int
getDesiredAsyncReadCount(std::uint32_t ledgerSeq) = 0;
/** Get the positive cache hits to total attempts ratio. */
virtual float
getCacheHitRate() = 0;
/** Set the maximum number of entries and maximum cache age for both caches.
@param size Number of cache entries (0 = ignore)
@param age Maximum cache age in seconds
*/
virtual void
tune(int size, std::chrono::seconds age) = 0;
/** Remove expired entries from the positive and negative caches. */ /** Remove expired entries from the positive and negative caches. */
virtual void virtual void
sweep() = 0; sweep() = 0;
@@ -272,11 +259,7 @@ protected:
// Called by the public storeLedger function // Called by the public storeLedger function
bool bool
storeLedger( storeLedger(Ledger const& srcLedger, std::shared_ptr<Backend> dstBackend);
Ledger const& srcLedger,
std::shared_ptr<Backend> dstBackend,
std::shared_ptr<TaggedCache<uint256, NodeObject>> dstPCache,
std::shared_ptr<KeyCache<uint256>> dstNCache);
private: private:
std::atomic<std::uint64_t> storeCount_{0}; std::atomic<std::uint64_t> storeCount_{0};
@@ -285,10 +268,14 @@ private:
std::mutex readLock_; std::mutex readLock_;
std::condition_variable readCondVar_; std::condition_variable readCondVar_;
std::condition_variable readGenCondVar_;
// reads to do // reads to do
std::map<uint256, std::uint32_t> read_; std::map<
uint256,
std::vector<std::pair<
std::uint32_t,
std::function<void(std::shared_ptr<NodeObject> const&)>>>>
read_;
// last read // last read
uint256 readLastHash_; uint256 readLastHash_;
@@ -296,9 +283,6 @@ private:
std::vector<std::thread> readThreads_; std::vector<std::thread> readThreads_;
bool readShut_{false}; bool readShut_{false};
// current read generation
uint64_t readGen_{0};
// The default is 32570 to match the XRP ledger network's earliest // The default is 32570 to match the XRP ledger network's earliest
// allowed sequence. Alternate networks may set this value. // allowed sequence. Alternate networks may set this value.
std::uint32_t const earliestLedgerSeq_; std::uint32_t const earliestLedgerSeq_;

View File

@@ -44,9 +44,6 @@ public:
{ {
} }
virtual TaggedCache<uint256, NodeObject> const&
getPositiveCache() = 0;
/** Rotates the backends. /** Rotates the backends.
@param f A function executed before the rotation and under the same lock @param f A function executed before the rotation and under the same lock

View File

@@ -37,7 +37,6 @@ struct FetchReport
std::chrono::milliseconds elapsed; std::chrono::milliseconds elapsed;
FetchType const fetchType; FetchType const fetchType;
bool wentToDisk = false;
bool wasFound = false; bool wasFound = false;
}; };

View File

@@ -147,19 +147,6 @@ public:
return ok; return ok;
} }
bool
canFetchBatch() override
{
return false;
}
std::vector<std::shared_ptr<NodeObject>>
fetchBatch(std::size_t n, void const* const* keys) override
{
Throw<std::runtime_error>("pure virtual called");
return {};
}
void void
store(std::shared_ptr<NodeObject> const& object) override store(std::shared_ptr<NodeObject> const& object) override
{ {

View File

@@ -188,19 +188,6 @@ public:
return status; return status;
} }
bool
canFetchBatch() override
{
return false;
}
std::vector<std::shared_ptr<NodeObject>>
fetchBatch(std::size_t n, void const* const* keys) override
{
Throw<std::runtime_error>("pure virtual called");
return {};
}
void void
do_insert(std::shared_ptr<NodeObject> const& no) do_insert(std::shared_ptr<NodeObject> const& no)
{ {

View File

@@ -60,19 +60,6 @@ public:
return notFound; return notFound;
} }
bool
canFetchBatch() override
{
return false;
}
std::vector<std::shared_ptr<NodeObject>>
fetchBatch(std::size_t n, void const* const* keys) override
{
Throw<std::runtime_error>("pure virtual called");
return {};
}
void void
store(std::shared_ptr<NodeObject> const& object) override store(std::shared_ptr<NodeObject> const& object) override
{ {

View File

@@ -305,19 +305,6 @@ public:
return status; return status;
} }
bool
canFetchBatch() override
{
return false;
}
std::vector<std::shared_ptr<NodeObject>>
fetchBatch(std::size_t n, void const* const* keys) override
{
Throw<std::runtime_error>("pure virtual called");
return {};
}
void void
store(std::shared_ptr<NodeObject> const& object) override store(std::shared_ptr<NodeObject> const& object) override
{ {

View File

@@ -57,24 +57,6 @@ Database::~Database()
stopReadThreads(); stopReadThreads();
} }
void
Database::waitReads()
{
std::unique_lock<std::mutex> lock(readLock_);
// Wake in two generations.
// Each generation is a full pass over the space.
// If we're in generation N and you issue a request,
// that request will only be done during generation N
// if it happens to land after where the pass currently is.
// But, if not, it will definitely be done during generation
// N+1 since the request was in the table before that pass
// even started. So when you reach generation N+2,
// you know the request is done.
std::uint64_t const wakeGen = readGen_ + 2;
while (!readShut_ && !read_.empty() && (readGen_ < wakeGen))
readGenCondVar_.wait(lock);
}
void void
Database::onStop() Database::onStop()
{ {
@@ -99,7 +81,6 @@ Database::stopReadThreads()
readShut_ = true; readShut_ = true;
readCondVar_.notify_all(); readCondVar_.notify_all();
readGenCondVar_.notify_all();
} }
for (auto& e : readThreads_) for (auto& e : readThreads_)
@@ -107,12 +88,15 @@ Database::stopReadThreads()
} }
void void
Database::asyncFetch(uint256 const& hash, std::uint32_t ledgerSeq) Database::asyncFetch(
uint256 const& hash,
std::uint32_t ledgerSeq,
std::function<void(std::shared_ptr<NodeObject> const&)>&& cb)
{ {
// Post a read // Post a read
std::lock_guard lock(readLock_); std::lock_guard lock(readLock_);
if (read_.emplace(hash, ledgerSeq).second) read_[hash].emplace_back(ledgerSeq, std::move(cb));
readCondVar_.notify_one(); readCondVar_.notify_one();
} }
void void
@@ -171,8 +155,7 @@ Database::fetchNodeObject(
++fetchHitCount_; ++fetchHitCount_;
fetchSz_ += nodeObject->getData().size(); fetchSz_ += nodeObject->getData().size();
} }
if (fetchReport.wentToDisk) ++fetchTotalCount_;
++fetchTotalCount_;
fetchReport.elapsed = fetchReport.elapsed =
duration_cast<milliseconds>(steady_clock::now() - begin); duration_cast<milliseconds>(steady_clock::now() - begin);
@@ -183,9 +166,7 @@ Database::fetchNodeObject(
bool bool
Database::storeLedger( Database::storeLedger(
Ledger const& srcLedger, Ledger const& srcLedger,
std::shared_ptr<Backend> dstBackend, std::shared_ptr<Backend> dstBackend)
std::shared_ptr<TaggedCache<uint256, NodeObject>> dstPCache,
std::shared_ptr<KeyCache<uint256>> dstNCache)
{ {
auto fail = [&](std::string const& msg) { auto fail = [&](std::string const& msg) {
JLOG(j_.error()) << "Source ledger sequence " << srcLedger.info().seq JLOG(j_.error()) << "Source ledger sequence " << srcLedger.info().seq
@@ -193,8 +174,6 @@ Database::storeLedger(
return false; return false;
}; };
if (!dstPCache || !dstNCache)
return fail("Invalid destination cache");
if (srcLedger.info().hash.isZero()) if (srcLedger.info().hash.isZero())
return fail("Invalid hash"); return fail("Invalid hash");
if (srcLedger.info().accountHash.isZero()) if (srcLedger.info().accountHash.isZero())
@@ -209,12 +188,7 @@ Database::storeLedger(
auto storeBatch = [&]() { auto storeBatch = [&]() {
std::uint64_t sz{0}; std::uint64_t sz{0};
for (auto const& nodeObject : batch) for (auto const& nodeObject : batch)
{
dstPCache->canonicalize_replace_cache(
nodeObject->getHash(), nodeObject);
dstNCache->erase(nodeObject->getHash());
sz += nodeObject->getData().size(); sz += nodeObject->getData().size();
}
try try
{ {
@@ -296,13 +270,16 @@ Database::threadEntry()
while (true) while (true)
{ {
uint256 lastHash; uint256 lastHash;
std::uint32_t lastSeq; std::vector<std::pair<
std::uint32_t,
std::function<void(std::shared_ptr<NodeObject> const&)>>>
entry;
{ {
std::unique_lock<std::mutex> lock(readLock_); std::unique_lock<std::mutex> lock(readLock_);
while (!readShut_ && read_.empty()) while (!readShut_ && read_.empty())
{ {
// All work is done // All work is done
readGenCondVar_.notify_all();
readCondVar_.wait(lock); readCondVar_.wait(lock);
} }
if (readShut_) if (readShut_)
@@ -312,19 +289,26 @@ Database::threadEntry()
auto it = read_.lower_bound(readLastHash_); auto it = read_.lower_bound(readLastHash_);
if (it == read_.end()) if (it == read_.end())
{ {
// start over from the beginning
it = read_.begin(); it = read_.begin();
// A generation has completed
++readGen_;
readGenCondVar_.notify_all();
} }
lastHash = it->first; lastHash = it->first;
lastSeq = it->second; entry = std::move(it->second);
read_.erase(it); read_.erase(it);
readLastHash_ = lastHash; readLastHash_ = lastHash;
} }
// Perform the read auto seq = entry[0].first;
fetchNodeObject(lastHash, lastSeq, FetchType::async); auto obj = fetchNodeObject(lastHash, seq, FetchType::async);
for (auto const& req : entry)
{
if ((seq == req.first) || isSameDB(req.first, seq))
req.second(obj);
else
req.second(
fetchNodeObject(lastHash, req.first, FetchType::async));
}
} }
} }

View File

@@ -32,42 +32,14 @@ DatabaseNodeImp::store(
std::uint32_t) std::uint32_t)
{ {
auto nObj = NodeObject::createObject(type, std::move(data), hash); auto nObj = NodeObject::createObject(type, std::move(data), hash);
pCache_->canonicalize_replace_cache(hash, nObj);
backend_->store(nObj); backend_->store(nObj);
nCache_->erase(hash);
storeStats(1, nObj->getData().size()); storeStats(1, nObj->getData().size());
} }
bool
DatabaseNodeImp::asyncFetch(
uint256 const& hash,
std::uint32_t ledgerSeq,
std::shared_ptr<NodeObject>& nodeObject)
{
// See if the object is in cache
nodeObject = pCache_->fetch(hash);
if (nodeObject || nCache_->touch_if_exists(hash))
return true;
// Otherwise post a read
Database::asyncFetch(hash, ledgerSeq);
return false;
}
void
DatabaseNodeImp::tune(int size, std::chrono::seconds age)
{
pCache_->setTargetSize(size);
pCache_->setTargetAge(age);
nCache_->setTargetSize(size);
nCache_->setTargetAge(age);
}
void void
DatabaseNodeImp::sweep() DatabaseNodeImp::sweep()
{ {
pCache_->sweep(); // nothing to do
nCache_->sweep();
} }
std::shared_ptr<NodeObject> std::shared_ptr<NodeObject>
@@ -76,60 +48,38 @@ DatabaseNodeImp::fetchNodeObject(
std::uint32_t, std::uint32_t,
FetchReport& fetchReport) FetchReport& fetchReport)
{ {
// See if the node object exists in the cache std::shared_ptr<NodeObject> nodeObject;
auto nodeObject{pCache_->fetch(hash)}; Status status;
if (!nodeObject && !nCache_->touch_if_exists(hash))
try
{ {
// Try the backend status = backend_->fetch(hash.data(), &nodeObject);
fetchReport.wentToDisk = true;
Status status;
try
{
status = backend_->fetch(hash.data(), &nodeObject);
}
catch (std::exception const& e)
{
JLOG(j_.fatal()) << "Exception, " << e.what();
Rethrow();
}
switch (status)
{
case ok:
++fetchHitCount_;
if (nodeObject)
fetchSz_ += nodeObject->getData().size();
break;
case notFound:
break;
case dataCorrupt:
JLOG(j_.fatal()) << "Corrupt NodeObject #" << hash;
break;
default:
JLOG(j_.warn()) << "Unknown status=" << status;
break;
}
if (!nodeObject)
{
// Just in case a write occurred
nodeObject = pCache_->fetch(hash);
if (!nodeObject)
// We give up
nCache_->insert(hash);
}
else
{
fetchReport.wasFound = true;
// Ensure all threads get the same object
pCache_->canonicalize_replace_client(hash, nodeObject);
// Since this was a 'hard' fetch, we will log it
JLOG(j_.trace()) << "HOS: " << hash << " fetch: in shard db";
}
} }
catch (std::exception const& e)
{
JLOG(j_.fatal()) << "Exception, " << e.what();
Rethrow();
}
switch (status)
{
case ok:
++fetchHitCount_;
if (nodeObject)
fetchSz_ += nodeObject->getData().size();
break;
case notFound:
break;
case dataCorrupt:
JLOG(j_.fatal()) << "Corrupt NodeObject #" << hash;
break;
default:
JLOG(j_.warn()) << "Unknown status=" << status;
break;
}
if (nodeObject)
fetchReport.wasFound = true;
return nodeObject; return nodeObject;
} }

View File

@@ -43,17 +43,6 @@ public:
Section const& config, Section const& config,
beast::Journal j) beast::Journal j)
: Database(name, parent, scheduler, readThreads, config, j) : Database(name, parent, scheduler, readThreads, config, j)
, pCache_(std::make_shared<TaggedCache<uint256, NodeObject>>(
name,
cacheTargetSize,
cacheTargetAge,
stopwatch(),
j))
, nCache_(std::make_shared<KeyCache<uint256>>(
name,
stopwatch(),
cacheTargetSize,
cacheTargetAge))
, backend_(std::move(backend)) , backend_(std::move(backend))
{ {
assert(backend_); assert(backend_);
@@ -88,45 +77,22 @@ public:
store(NodeObjectType type, Blob&& data, uint256 const& hash, std::uint32_t) store(NodeObjectType type, Blob&& data, uint256 const& hash, std::uint32_t)
override; override;
bool bool isSameDB(std::uint32_t, std::uint32_t) override
asyncFetch( {
uint256 const& hash, // only one database
std::uint32_t ledgerSeq, return true;
std::shared_ptr<NodeObject>& nodeObject) override; }
bool bool
storeLedger(std::shared_ptr<Ledger const> const& srcLedger) override storeLedger(std::shared_ptr<Ledger const> const& srcLedger) override
{ {
return Database::storeLedger(*srcLedger, backend_, pCache_, nCache_); return Database::storeLedger(*srcLedger, backend_);
} }
int getDesiredAsyncReadCount(std::uint32_t) override
{
// We prefer a client not fill our cache
// We don't want to push data out of the cache
// before it's retrieved
return pCache_->getTargetSize() / asyncDivider;
}
float
getCacheHitRate() override
{
return pCache_->getHitRate();
}
void
tune(int size, std::chrono::seconds age) override;
void void
sweep() override; sweep() override;
private: private:
// Positive cache
std::shared_ptr<TaggedCache<uint256, NodeObject>> pCache_;
// Negative cache
std::shared_ptr<KeyCache<uint256>> nCache_;
// Persistent key/value storage // Persistent key/value storage
std::shared_ptr<Backend> backend_; std::shared_ptr<Backend> backend_;

View File

@@ -34,17 +34,6 @@ DatabaseRotatingImp::DatabaseRotatingImp(
Section const& config, Section const& config,
beast::Journal j) beast::Journal j)
: DatabaseRotating(name, parent, scheduler, readThreads, config, j) : DatabaseRotating(name, parent, scheduler, readThreads, config, j)
, pCache_(std::make_shared<TaggedCache<uint256, NodeObject>>(
name,
cacheTargetSize,
cacheTargetAge,
stopwatch(),
j))
, nCache_(std::make_shared<KeyCache<uint256>>(
name,
stopwatch(),
cacheTargetSize,
cacheTargetAge))
, writableBackend_(std::move(writableBackend)) , writableBackend_(std::move(writableBackend))
, archiveBackend_(std::move(archiveBackend)) , archiveBackend_(std::move(archiveBackend))
{ {
@@ -101,7 +90,7 @@ DatabaseRotatingImp::storeLedger(std::shared_ptr<Ledger const> const& srcLedger)
return writableBackend_; return writableBackend_;
}(); }();
return Database::storeLedger(*srcLedger, backend, pCache_, nCache_); return Database::storeLedger(*srcLedger, backend);
} }
void void
@@ -112,7 +101,6 @@ DatabaseRotatingImp::store(
std::uint32_t) std::uint32_t)
{ {
auto nObj = NodeObject::createObject(type, std::move(data), hash); auto nObj = NodeObject::createObject(type, std::move(data), hash);
pCache_->canonicalize_replace_cache(hash, nObj);
auto const backend = [&] { auto const backend = [&] {
std::lock_guard lock(mutex_); std::lock_guard lock(mutex_);
@@ -120,40 +108,13 @@ DatabaseRotatingImp::store(
}(); }();
backend->store(nObj); backend->store(nObj);
nCache_->erase(hash);
storeStats(1, nObj->getData().size()); storeStats(1, nObj->getData().size());
} }
bool
DatabaseRotatingImp::asyncFetch(
uint256 const& hash,
std::uint32_t ledgerSeq,
std::shared_ptr<NodeObject>& nodeObject)
{
// See if the object is in cache
nodeObject = pCache_->fetch(hash);
if (nodeObject || nCache_->touch_if_exists(hash))
return true;
// Otherwise post a read
Database::asyncFetch(hash, ledgerSeq);
return false;
}
void
DatabaseRotatingImp::tune(int size, std::chrono::seconds age)
{
pCache_->setTargetSize(size);
pCache_->setTargetAge(age);
nCache_->setTargetSize(size);
nCache_->setTargetAge(age);
}
void void
DatabaseRotatingImp::sweep() DatabaseRotatingImp::sweep()
{ {
pCache_->sweep(); // nothing to do
nCache_->sweep();
} }
std::shared_ptr<NodeObject> std::shared_ptr<NodeObject>
@@ -196,56 +157,35 @@ DatabaseRotatingImp::fetchNodeObject(
}; };
// See if the node object exists in the cache // See if the node object exists in the cache
auto nodeObject{pCache_->fetch(hash)}; std::shared_ptr<NodeObject> nodeObject;
if (!nodeObject && !nCache_->touch_if_exists(hash))
auto [writable, archive] = [&] {
std::lock_guard lock(mutex_);
return std::make_pair(writableBackend_, archiveBackend_);
}();
// Try to fetch from the writable backend
nodeObject = fetch(writable);
if (!nodeObject)
{ {
auto [writable, archive] = [&] { // Otherwise try to fetch from the archive backend
std::lock_guard lock(mutex_); nodeObject = fetch(archive);
return std::make_pair(writableBackend_, archiveBackend_); if (nodeObject)
}();
fetchReport.wentToDisk = true;
// Try to fetch from the writable backend
nodeObject = fetch(writable);
if (!nodeObject)
{ {
// Otherwise try to fetch from the archive backend
nodeObject = fetch(archive);
if (nodeObject)
{ {
{ // Refresh the writable backend pointer
// Refresh the writable backend pointer std::lock_guard lock(mutex_);
std::lock_guard lock(mutex_); writable = writableBackend_;
writable = writableBackend_;
}
// Update writable backend with data from the archive backend
writable->store(nodeObject);
nCache_->erase(hash);
} }
}
if (!nodeObject) // Update writable backend with data from the archive backend
{ writable->store(nodeObject);
// Just in case a write occurred
nodeObject = pCache_->fetch(hash);
if (!nodeObject)
// We give up
nCache_->insert(hash);
}
else
{
fetchReport.wasFound = true;
// Ensure all threads get the same object
pCache_->canonicalize_replace_client(hash, nodeObject);
// Since this was a 'hard' fetch, we will log it
JLOG(j_.trace()) << "HOS: " << hash << " fetch: in shard db";
} }
} }
if (nodeObject)
fetchReport.wasFound = true;
return nodeObject; return nodeObject;
} }

View File

@@ -63,52 +63,23 @@ public:
void void
import(Database& source) override; import(Database& source) override;
bool isSameDB(std::uint32_t, std::uint32_t) override
{
// rotating store acts as one logical database
return true;
}
void void
store(NodeObjectType type, Blob&& data, uint256 const& hash, std::uint32_t) store(NodeObjectType type, Blob&& data, uint256 const& hash, std::uint32_t)
override; override;
bool
asyncFetch(
uint256 const& hash,
std::uint32_t ledgerSeq,
std::shared_ptr<NodeObject>& nodeObject) override;
bool bool
storeLedger(std::shared_ptr<Ledger const> const& srcLedger) override; storeLedger(std::shared_ptr<Ledger const> const& srcLedger) override;
int getDesiredAsyncReadCount(std::uint32_t) override
{
// We prefer a client not fill our cache
// We don't want to push data out of the cache
// before it's retrieved
return pCache_->getTargetSize() / asyncDivider;
}
float
getCacheHitRate() override
{
return pCache_->getHitRate();
}
void
tune(int size, std::chrono::seconds age) override;
void void
sweep() override; sweep() override;
TaggedCache<uint256, NodeObject> const&
getPositiveCache() override
{
return *pCache_;
}
private: private:
// Positive cache
std::shared_ptr<TaggedCache<uint256, NodeObject>> pCache_;
// Negative cache
std::shared_ptr<KeyCache<uint256>> nCache_;
std::shared_ptr<Backend> writableBackend_; std::shared_ptr<Backend> writableBackend_;
std::shared_ptr<Backend> archiveBackend_; std::shared_ptr<Backend> archiveBackend_;
mutable std::mutex mutex_; mutable std::mutex mutex_;

View File

@@ -1017,31 +1017,6 @@ DatabaseShardImp::store(
storeStats(1, nodeObject->getData().size()); storeStats(1, nodeObject->getData().size());
} }
bool
DatabaseShardImp::asyncFetch(
uint256 const& hash,
std::uint32_t ledgerSeq,
std::shared_ptr<NodeObject>& nodeObject)
{
std::shared_ptr<Shard> shard;
{
std::lock_guard lock(mutex_);
assert(init_);
auto const it{shards_.find(acquireIndex_)};
if (it == shards_.end())
return false;
shard = it->second;
}
if (shard->fetchNodeObjectFromCache(hash, nodeObject))
return true;
// Otherwise post a read
Database::asyncFetch(hash, ledgerSeq);
return false;
}
bool bool
DatabaseShardImp::storeLedger(std::shared_ptr<Ledger const> const& srcLedger) DatabaseShardImp::storeLedger(std::shared_ptr<Ledger const> const& srcLedger)
{ {
@@ -1077,41 +1052,6 @@ DatabaseShardImp::storeLedger(std::shared_ptr<Ledger const> const& srcLedger)
return setStoredInShard(shard, srcLedger); return setStoredInShard(shard, srcLedger);
} }
int
DatabaseShardImp::getDesiredAsyncReadCount(std::uint32_t ledgerSeq)
{
auto const shardIndex{seqToShardIndex(ledgerSeq)};
std::shared_ptr<Shard> shard;
{
std::lock_guard lock(mutex_);
assert(init_);
auto const it{shards_.find(shardIndex)};
if (it == shards_.end())
return 0;
shard = it->second;
}
return shard->getDesiredAsyncReadCount();
}
float
DatabaseShardImp::getCacheHitRate()
{
std::shared_ptr<Shard> shard;
{
std::lock_guard lock(mutex_);
assert(init_);
auto const it{shards_.find(acquireIndex_)};
if (it == shards_.end())
return 0;
shard = it->second;
}
return shard->getCacheHitRate();
}
void void
DatabaseShardImp::sweep() DatabaseShardImp::sweep()
{ {

View File

@@ -139,6 +139,12 @@ public:
std::int32_t std::int32_t
getWriteLoad() const override; getWriteLoad() const override;
bool
isSameDB(std::uint32_t s1, std::uint32_t s2) override
{
return seqToShardIndex(s1) == seqToShardIndex(s2);
}
void void
store( store(
NodeObjectType type, NodeObjectType type,
@@ -146,24 +152,9 @@ public:
uint256 const& hash, uint256 const& hash,
std::uint32_t ledgerSeq) override; std::uint32_t ledgerSeq) override;
bool
asyncFetch(
uint256 const& hash,
std::uint32_t ledgerSeq,
std::shared_ptr<NodeObject>& nodeObject) override;
bool bool
storeLedger(std::shared_ptr<Ledger const> const& srcLedger) override; storeLedger(std::shared_ptr<Ledger const> const& srcLedger) override;
int
getDesiredAsyncReadCount(std::uint32_t ledgerSeq) override;
float
getCacheHitRate() override;
void
tune(int size, std::chrono::seconds age) override{};
void void
sweep() override; sweep() override;

View File

@@ -176,8 +176,6 @@ Shard::tryClose()
acquireInfo_.reset(); acquireInfo_.reset();
// Reset caches to reduce memory use // Reset caches to reduce memory use
pCache_->reset();
nCache_->reset();
app_.getShardFamily()->getFullBelowCache(lastSeq_)->reset(); app_.getShardFamily()->getFullBelowCache(lastSeq_)->reset();
app_.getShardFamily()->getTreeNodeCache(lastSeq_)->reset(); app_.getShardFamily()->getTreeNodeCache(lastSeq_)->reset();
@@ -225,8 +223,6 @@ Shard::storeNodeObject(std::shared_ptr<NodeObject> const& nodeObject)
if (!scopedCount) if (!scopedCount)
return false; return false;
pCache_->canonicalize_replace_cache(nodeObject->getHash(), nodeObject);
try try
{ {
backend_->store(nodeObject); backend_->store(nodeObject);
@@ -239,7 +235,6 @@ Shard::storeNodeObject(std::shared_ptr<NodeObject> const& nodeObject)
return false; return false;
} }
nCache_->erase(nodeObject->getHash());
return true; return true;
} }
@@ -250,82 +245,47 @@ Shard::fetchNodeObject(uint256 const& hash, FetchReport& fetchReport)
if (!scopedCount) if (!scopedCount)
return nullptr; return nullptr;
// See if the node object exists in the cache std::shared_ptr<NodeObject> nodeObject;
auto nodeObject{pCache_->fetch(hash)};
if (!nodeObject && !nCache_->touch_if_exists(hash)) // Try the backend
Status status;
try
{ {
// Try the backend status = backend_->fetch(hash.data(), &nodeObject);
fetchReport.wentToDisk = true; }
catch (std::exception const& e)
{
JLOG(j_.fatal()) << "shard " << index_
<< ". Exception caught in function " << __func__
<< ". Error: " << e.what();
return nullptr;
}
Status status; switch (status)
try {
{ case ok:
status = backend_->fetch(hash.data(), &nodeObject); case notFound:
} break;
catch (std::exception const& e) case dataCorrupt: {
{
JLOG(j_.fatal()) JLOG(j_.fatal())
<< "shard " << index_ << ". Exception caught in function " << "shard " << index_ << ". Corrupt node object at hash "
<< __func__ << ". Error: " << e.what(); << to_string(hash);
return nullptr; break;
} }
default: {
switch (status) JLOG(j_.warn())
{ << "shard " << index_ << ". Unknown status=" << status
case ok: << " fetching node object at hash " << to_string(hash);
case notFound: break;
break;
case dataCorrupt: {
JLOG(j_.fatal())
<< "shard " << index_ << ". Corrupt node object at hash "
<< to_string(hash);
break;
}
default: {
JLOG(j_.warn())
<< "shard " << index_ << ". Unknown status=" << status
<< " fetching node object at hash " << to_string(hash);
break;
}
}
if (!nodeObject)
{
// Just in case a write occurred
nodeObject = pCache_->fetch(hash);
if (!nodeObject)
// We give up
nCache_->insert(hash);
}
else
{
// Ensure all threads get the same object
pCache_->canonicalize_replace_client(hash, nodeObject);
fetchReport.wasFound = true;
// Since this was a 'hard' fetch, we will log it
JLOG(j_.trace()) << "HOS: " << hash << " fetch: in shard db";
} }
} }
if (nodeObject)
fetchReport.wasFound = true;
return nodeObject; return nodeObject;
} }
bool
Shard::fetchNodeObjectFromCache(
uint256 const& hash,
std::shared_ptr<NodeObject>& nodeObject)
{
auto const scopedCount{makeBackendCount()};
if (!scopedCount)
return false;
nodeObject = pCache_->fetch(hash);
if (nodeObject || nCache_->touch_if_exists(hash))
return true;
return false;
}
Shard::StoreLedgerResult Shard::StoreLedgerResult
Shard::storeLedger( Shard::storeLedger(
std::shared_ptr<Ledger const> const& srcLedger, std::shared_ptr<Ledger const> const& srcLedger,
@@ -369,12 +329,7 @@ Shard::storeLedger(
auto storeBatch = [&]() { auto storeBatch = [&]() {
std::uint64_t sz{0}; std::uint64_t sz{0};
for (auto const& nodeObject : batch) for (auto const& nodeObject : batch)
{
pCache_->canonicalize_replace_cache(
nodeObject->getHash(), nodeObject);
nCache_->erase(nodeObject->getHash());
sz += nodeObject->getData().size(); sz += nodeObject->getData().size();
}
try try
{ {
@@ -530,38 +485,7 @@ Shard::containsLedger(std::uint32_t ledgerSeq) const
void void
Shard::sweep() Shard::sweep()
{ {
boost::optional<Shard::Count> scopedCount; // nothing to do
{
std::lock_guard lock(mutex_);
if (!backend_ || !backend_->isOpen())
{
JLOG(j_.error()) << "shard " << index_ << " not initialized";
return;
}
scopedCount.emplace(&backendCount_);
}
pCache_->sweep();
nCache_->sweep();
}
int
Shard::getDesiredAsyncReadCount()
{
auto const scopedCount{makeBackendCount()};
if (!scopedCount)
return 0;
return pCache_->getTargetSize() / asyncDivider;
}
float
Shard::getCacheHitRate()
{
auto const scopedCount{makeBackendCount()};
if (!scopedCount)
return 0;
return pCache_->getHitRate();
} }
std::chrono::steady_clock::time_point std::chrono::steady_clock::time_point
@@ -712,8 +636,6 @@ Shard::finalize(
auto const treeNodeCache{shardFamily.getTreeNodeCache(lastSeq_)}; auto const treeNodeCache{shardFamily.getTreeNodeCache(lastSeq_)};
// Reset caches to reduce memory usage // Reset caches to reduce memory usage
pCache_->reset();
nCache_->reset();
fullBelowCache->reset(); fullBelowCache->reset();
treeNodeCache->reset(); treeNodeCache->reset();
@@ -767,8 +689,6 @@ Shard::finalize(
next = std::move(ledger); next = std::move(ledger);
--ledgerSeq; --ledgerSeq;
pCache_->reset();
nCache_->reset();
fullBelowCache->reset(); fullBelowCache->reset();
treeNodeCache->reset(); treeNodeCache->reset();
} }
@@ -859,9 +779,6 @@ Shard::open(std::lock_guard<std::mutex> const& lock)
txSQLiteDB_.reset(); txSQLiteDB_.reset();
acquireInfo_.reset(); acquireInfo_.reset();
pCache_.reset();
nCache_.reset();
state_ = acquire; state_ = acquire;
if (!preexist) if (!preexist)
@@ -982,14 +899,6 @@ Shard::open(std::lock_guard<std::mutex> const& lock)
". Error: " + e.what()); ". Error: " + e.what());
} }
// Set backend caches
auto const size{config.getValueFor(SizedItem::nodeCacheSize, 0)};
auto const age{
std::chrono::seconds{config.getValueFor(SizedItem::nodeCacheAge, 0)}};
auto const name{"shard " + std::to_string(index_)};
pCache_ = std::make_unique<PCache>(name, size, age, stopwatch(), j_);
nCache_ = std::make_unique<NCache>(name, stopwatch(), size, age);
if (!initSQLite(lock)) if (!initSQLite(lock))
return fail({}); return fail({});

View File

@@ -115,11 +115,6 @@ public:
[[nodiscard]] std::shared_ptr<NodeObject> [[nodiscard]] std::shared_ptr<NodeObject>
fetchNodeObject(uint256 const& hash, FetchReport& fetchReport); fetchNodeObject(uint256 const& hash, FetchReport& fetchReport);
[[nodiscard]] bool
fetchNodeObjectFromCache(
uint256 const& hash,
std::shared_ptr<NodeObject>& nodeObject);
/** Store a ledger. /** Store a ledger.
@param srcLedger The ledger to store. @param srcLedger The ledger to store.
@@ -159,12 +154,6 @@ public:
return dir_; return dir_;
} }
[[nodiscard]] int
getDesiredAsyncReadCount();
[[nodiscard]] float
getCacheHitRate();
[[nodiscard]] std::chrono::steady_clock::time_point [[nodiscard]] std::chrono::steady_clock::time_point
getLastUse() const; getLastUse() const;
@@ -278,12 +267,6 @@ private:
// The earliest shard may store fewer ledgers than subsequent shards // The earliest shard may store fewer ledgers than subsequent shards
std::uint32_t const maxLedgers_; std::uint32_t const maxLedgers_;
// Database positive cache
std::unique_ptr<PCache> pCache_;
// Database negative cache
std::unique_ptr<NCache> nCache_;
// Path to database files // Path to database files
boost::filesystem::path const dir_; boost::filesystem::path const dir_;

View File

@@ -1,41 +0,0 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2012, 2013 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef RIPPLE_NODESTORE_TUNING_H_INCLUDED
#define RIPPLE_NODESTORE_TUNING_H_INCLUDED
namespace ripple {
namespace NodeStore {
enum {
// Target cache size of the TaggedCache used to hold nodes
cacheTargetSize = 16384
// Fraction of the cache one query source can take
,
asyncDivider = 8
};
// Expiration time for cached nodes
std::chrono::seconds constexpr cacheTargetAge = std::chrono::minutes{5};
} // namespace NodeStore
} // namespace ripple
#endif

View File

@@ -378,7 +378,6 @@ JSS(no_ripple); // out: AccountLines
JSS(no_ripple_peer); // out: AccountLines JSS(no_ripple_peer); // out: AccountLines
JSS(node); // out: LedgerEntry JSS(node); // out: LedgerEntry
JSS(node_binary); // out: LedgerEntry JSS(node_binary); // out: LedgerEntry
JSS(node_hit_rate); // out: GetCounts
JSS(node_read_bytes); // out: GetCounts JSS(node_read_bytes); // out: GetCounts
JSS(node_reads_hit); // out: GetCounts JSS(node_reads_hit); // out: GetCounts
JSS(node_reads_total); // out: GetCounts JSS(node_reads_total); // out: GetCounts

View File

@@ -99,7 +99,6 @@ getCountsJson(Application& app, int minObjectCount)
ret[jss::historical_perminute] = ret[jss::historical_perminute] =
static_cast<int>(app.getInboundLedgers().fetchRate()); static_cast<int>(app.getInboundLedgers().fetchRate());
ret[jss::SLE_hit_rate] = app.cachedSLEs().rate(); ret[jss::SLE_hit_rate] = app.cachedSLEs().rate();
ret[jss::node_hit_rate] = app.getNodeStore().getCacheHitRate();
ret[jss::ledger_hit_rate] = app.getLedgerMaster().getCacheHitRate(); ret[jss::ledger_hit_rate] = app.getLedgerMaster().getCacheHitRate();
ret[jss::AL_hit_rate] = app.getAcceptedLedgerCache().getHitRate(); ret[jss::AL_hit_rate] = app.getAcceptedLedgerCache().getHitRate();
@@ -137,7 +136,6 @@ getCountsJson(Application& app, int minObjectCount)
jv[jss::treenode_cache_size] = cacheSz; jv[jss::treenode_cache_size] = cacheSz;
jv[jss::treenode_track_size] = trackSz; jv[jss::treenode_track_size] = trackSz;
ret[jss::write_load] = shardStore->getWriteLoad(); ret[jss::write_load] = shardStore->getWriteLoad();
ret[jss::node_hit_rate] = shardStore->getCacheHitRate();
jv[jss::node_writes] = std::to_string(shardStore->getStoreCount()); jv[jss::node_writes] = std::to_string(shardStore->getStoreCount());
jv[jss::node_reads_total] = shardStore->getFetchTotalCount(); jv[jss::node_reads_total] = shardStore->getFetchTotalCount();
jv[jss::node_reads_hit] = shardStore->getFetchHitCount(); jv[jss::node_reads_hit] = shardStore->getFetchHitCount();

View File

@@ -392,12 +392,16 @@ private:
descendThrow(std::shared_ptr<SHAMapInnerNode> const&, int branch) const; descendThrow(std::shared_ptr<SHAMapInnerNode> const&, int branch) const;
// Descend with filter // Descend with filter
// If pending, callback is called as if it called fetchNodeNT
using descendCallback =
std::function<void(std::shared_ptr<SHAMapTreeNode>, SHAMapHash const&)>;
SHAMapTreeNode* SHAMapTreeNode*
descendAsync( descendAsync(
SHAMapInnerNode* parent, SHAMapInnerNode* parent,
int branch, int branch,
SHAMapSyncFilter* filter, SHAMapSyncFilter* filter,
bool& pending) const; bool& pending,
descendCallback&&) const;
std::pair<SHAMapTreeNode*, SHAMapNodeID> std::pair<SHAMapTreeNode*, SHAMapNodeID>
descend( descend(
@@ -468,9 +472,17 @@ private:
// such as std::vector, can't be used here. // such as std::vector, can't be used here.
std::stack<StackEntry, std::deque<StackEntry>> stack_; std::stack<StackEntry, std::deque<StackEntry>> stack_;
// nodes we may acquire from deferred reads // nodes we may have acquired from deferred reads
std::vector<std::tuple<SHAMapInnerNode*, SHAMapNodeID, int>> using DeferredNode = std::tuple<
deferredReads_; SHAMapInnerNode*, // parent node
SHAMapNodeID, // parent node ID
int, // branch
std::shared_ptr<SHAMapTreeNode>>; // node
int deferred_;
std::mutex deferLock_;
std::condition_variable deferCondVar_;
std::vector<DeferredNode> finishedReads_;
// nodes we need to resume after we get their children from deferred // nodes we need to resume after we get their children from deferred
// reads // reads
@@ -485,9 +497,10 @@ private:
, filter_(filter) , filter_(filter)
, maxDefer_(maxDefer) , maxDefer_(maxDefer)
, generation_(generation) , generation_(generation)
, deferred_(0)
{ {
missingNodes_.reserve(max); missingNodes_.reserve(max);
deferredReads_.reserve(maxDefer); finishedReads_.reserve(maxDefer);
} }
}; };
@@ -496,6 +509,12 @@ private:
gmn_ProcessNodes(MissingNodes&, MissingNodes::StackEntry& node); gmn_ProcessNodes(MissingNodes&, MissingNodes::StackEntry& node);
void void
gmn_ProcessDeferredReads(MissingNodes&); gmn_ProcessDeferredReads(MissingNodes&);
// fetch from DB helper function
std::shared_ptr<SHAMapTreeNode>
finishFetch(
SHAMapHash const& hash,
std::shared_ptr<NodeObject> const& object) const;
}; };
inline void inline void

View File

@@ -162,34 +162,41 @@ SHAMap::findKey(uint256 const& id) const
std::shared_ptr<SHAMapTreeNode> std::shared_ptr<SHAMapTreeNode>
SHAMap::fetchNodeFromDB(SHAMapHash const& hash) const SHAMap::fetchNodeFromDB(SHAMapHash const& hash) const
{ {
std::shared_ptr<SHAMapTreeNode> node; assert(backed_);
auto obj = f_.db().fetchNodeObject(hash.as_uint256(), ledgerSeq_);
return finishFetch(hash, obj);
}
if (backed_) std::shared_ptr<SHAMapTreeNode>
SHAMap::finishFetch(
SHAMapHash const& hash,
std::shared_ptr<NodeObject> const& object) const
{
assert(backed_);
if (!object)
{ {
if (auto nodeObject = if (full_)
f_.db().fetchNodeObject(hash.as_uint256(), ledgerSeq_))
{
try
{
node = SHAMapTreeNode::makeFromPrefix(
makeSlice(nodeObject->getData()), hash);
if (node)
canonicalize(hash, node);
}
catch (std::exception const&)
{
JLOG(journal_.warn()) << "Invalid DB node " << hash;
return std::shared_ptr<SHAMapLeafNode>();
}
}
else if (full_)
{ {
full_ = false; full_ = false;
f_.missingNode(ledgerSeq_); f_.missingNode(ledgerSeq_);
} }
return {};
} }
return node; std::shared_ptr<SHAMapTreeNode> node;
try
{
node =
SHAMapTreeNode::makeFromPrefix(makeSlice(object->getData()), hash);
if (node)
canonicalize(hash, node);
return node;
}
catch (std::exception const&)
{
JLOG(journal_.warn()) << "Invalid DB node " << hash;
return std::shared_ptr<SHAMapTreeNode>();
}
} }
// See if a sync filter has a node // See if a sync filter has a node
@@ -374,7 +381,8 @@ SHAMap::descendAsync(
SHAMapInnerNode* parent, SHAMapInnerNode* parent,
int branch, int branch,
SHAMapSyncFilter* filter, SHAMapSyncFilter* filter,
bool& pending) const bool& pending,
descendCallback&& callback) const
{ {
pending = false; pending = false;
@@ -392,19 +400,16 @@ SHAMap::descendAsync(
if (!ptr && backed_) if (!ptr && backed_)
{ {
std::shared_ptr<NodeObject> obj; f_.db().asyncFetch(
if (!f_.db().asyncFetch(hash.as_uint256(), ledgerSeq_, obj)) hash.as_uint256(),
{ ledgerSeq_,
pending = true; [this, hash, cb{std::move(callback)}](
return nullptr; std::shared_ptr<NodeObject> const& object) {
} auto node = finishFetch(hash, object);
if (!obj) cb(node, hash);
return nullptr; });
pending = true;
ptr = return nullptr;
SHAMapTreeNode::makeFromPrefix(makeSlice(obj->getData()), hash);
if (ptr && backed_)
canonicalize(hash, ptr);
} }
} }

View File

@@ -196,25 +196,37 @@ SHAMap::gmn_ProcessNodes(MissingNodes& mn, MissingNodes::StackEntry& se)
!f_.getFullBelowCache(ledgerSeq_) !f_.getFullBelowCache(ledgerSeq_)
->touch_if_exists(childHash.as_uint256())) ->touch_if_exists(childHash.as_uint256()))
{ {
SHAMapNodeID childID = nodeID.getChildNodeID(branch);
bool pending = false; bool pending = false;
auto d = descendAsync(node, branch, mn.filter_, pending); auto d = descendAsync(
node,
branch,
mn.filter_,
pending,
[node, nodeID, branch, &mn](
std::shared_ptr<SHAMapTreeNode> found, SHAMapHash const&) {
// a read completed asynchronously
std::unique_lock<std::mutex> lock{mn.deferLock_};
mn.finishedReads_.emplace_back(
node, nodeID, branch, std::move(found));
mn.deferCondVar_.notify_one();
});
if (!d) if (pending)
{ {
fullBelow = false;
++mn.deferred_;
}
else if (!d)
{
// node is not in database
fullBelow = false; // for now, not known full below fullBelow = false; // for now, not known full below
mn.missingHashes_.insert(childHash);
mn.missingNodes_.emplace_back(
nodeID.getChildNodeID(branch), childHash.as_uint256());
if (!pending) if (--mn.max_ <= 0)
{ // node is not in the database return;
mn.missingHashes_.insert(childHash);
mn.missingNodes_.emplace_back(
childID, childHash.as_uint256());
if (--mn.max_ <= 0)
return;
}
else
mn.deferredReads_.emplace_back(node, nodeID, branch);
} }
else if ( else if (
d->isInner() && d->isInner() &&
@@ -224,7 +236,7 @@ SHAMap::gmn_ProcessNodes(MissingNodes& mn, MissingNodes::StackEntry& se)
// Switch to processing the child node // Switch to processing the child node
node = static_cast<SHAMapInnerNode*>(d); node = static_cast<SHAMapInnerNode*>(d);
nodeID = childID; nodeID = nodeID.getChildNodeID(branch);
firstChild = rand_int(255); firstChild = rand_int(255);
currentChild = 0; currentChild = 0;
fullBelow = true; fullBelow = true;
@@ -253,30 +265,32 @@ SHAMap::gmn_ProcessNodes(MissingNodes& mn, MissingNodes::StackEntry& se)
void void
SHAMap::gmn_ProcessDeferredReads(MissingNodes& mn) SHAMap::gmn_ProcessDeferredReads(MissingNodes& mn)
{ {
// Wait for our deferred reads to finish
auto const before = std::chrono::steady_clock::now();
f_.db().waitReads();
auto const after = std::chrono::steady_clock::now();
auto const elapsed =
std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
auto const count = mn.deferredReads_.size();
// Process all deferred reads // Process all deferred reads
int hits = 0; int complete = 0;
for (auto const& deferredNode : mn.deferredReads_) while (complete != mn.deferred_)
{ {
std::tuple<
SHAMapInnerNode*,
SHAMapNodeID,
int,
std::shared_ptr<SHAMapTreeNode>>
deferredNode;
{
std::unique_lock<std::mutex> lock{mn.deferLock_};
while (mn.finishedReads_.size() <= complete)
mn.deferCondVar_.wait(lock);
deferredNode = std::move(mn.finishedReads_[complete++]);
}
auto parent = std::get<0>(deferredNode); auto parent = std::get<0>(deferredNode);
auto const& parentID = std::get<1>(deferredNode); auto const& parentID = std::get<1>(deferredNode);
auto branch = std::get<2>(deferredNode); auto branch = std::get<2>(deferredNode);
auto nodePtr = std::get<3>(deferredNode);
auto const& nodeHash = parent->getChildHash(branch); auto const& nodeHash = parent->getChildHash(branch);
auto nodePtr = fetchNodeNT(nodeHash, mn.filter_);
if (nodePtr) if (nodePtr)
{ // Got the node { // Got the node
++hits;
if (backed_)
canonicalize(nodeHash, nodePtr);
nodePtr = parent->canonicalizeChild(branch, std::move(nodePtr)); nodePtr = parent->canonicalizeChild(branch, std::move(nodePtr));
// When we finish this stack, we need to restart // When we finish this stack, we need to restart
@@ -287,24 +301,12 @@ SHAMap::gmn_ProcessDeferredReads(MissingNodes& mn)
{ {
mn.missingNodes_.emplace_back( mn.missingNodes_.emplace_back(
parentID.getChildNodeID(branch), nodeHash.as_uint256()); parentID.getChildNodeID(branch), nodeHash.as_uint256());
--mn.max_; --mn.max_;
} }
} }
mn.deferredReads_.clear();
auto const process_time = mn.finishedReads_.clear();
std::chrono::duration_cast<std::chrono::milliseconds>( mn.deferred_ = 0;
std::chrono::steady_clock::now() - after);
using namespace std::chrono_literals;
if ((count > 50) || (elapsed > 50ms))
{
JLOG(journal_.debug())
<< "getMissingNodes reads " << count << " nodes (" << hits
<< " hits) in " << elapsed.count() << " + " << process_time.count()
<< " ms";
}
} }
/** Get a list of node IDs and hashes for nodes that are part of this SHAMap /** Get a list of node IDs and hashes for nodes that are part of this SHAMap
@@ -320,7 +322,7 @@ SHAMap::getMissingNodes(int max, SHAMapSyncFilter* filter)
MissingNodes mn( MissingNodes mn(
max, max,
filter, filter,
f_.db().getDesiredAsyncReadCount(ledgerSeq_), 4096, // number of async reads per pass
f_.getFullBelowCache(ledgerSeq_)->getGeneration()); f_.getFullBelowCache(ledgerSeq_)->getGeneration());
if (!root_->isInner() || if (!root_->isInner() ||
@@ -350,12 +352,12 @@ SHAMap::getMissingNodes(int max, SHAMapSyncFilter* filter)
// Traverse the map without blocking // Traverse the map without blocking
do do
{ {
while ((node != nullptr) && (mn.deferredReads_.size() <= mn.maxDefer_)) while ((node != nullptr) && (mn.deferred_ <= mn.maxDefer_))
{ {
gmn_ProcessNodes(mn, pos); gmn_ProcessNodes(mn, pos);
if (mn.max_ <= 0) if (mn.max_ <= 0)
return std::move(mn.missingNodes_); break;
if ((node == nullptr) && !mn.stack_.empty()) if ((node == nullptr) && !mn.stack_.empty())
{ {
@@ -380,8 +382,7 @@ SHAMap::getMissingNodes(int max, SHAMapSyncFilter* filter)
// We have either emptied the stack or // We have either emptied the stack or
// posted as many deferred reads as we can // posted as many deferred reads as we can
if (mn.deferred_)
if (!mn.deferredReads_.empty())
gmn_ProcessDeferredReads(mn); gmn_ProcessDeferredReads(mn);
if (mn.max_ <= 0) if (mn.max_ <= 0)