diff --git a/src/ripple/app/ledger/impl/InboundLedger.cpp b/src/ripple/app/ledger/impl/InboundLedger.cpp index fe9bc6f59..39fe90401 100644 --- a/src/ripple/app/ledger/impl/InboundLedger.cpp +++ b/src/ripple/app/ledger/impl/InboundLedger.cpp @@ -321,11 +321,11 @@ InboundLedger::tryDB(NodeStore::Database& srcDB) }; // Try to fetch the ledger header from the DB - if (auto node = srcDB.fetch(mHash, mSeq)) + if (auto nodeObject = srcDB.fetchNodeObject(mHash, mSeq)) { JLOG(m_journal.trace()) << "Ledger header found in local store"; - makeLedger(node->getData()); + makeLedger(nodeObject->getData()); if (mFailed) return; @@ -333,7 +333,7 @@ InboundLedger::tryDB(NodeStore::Database& srcDB) auto& dstDB{mLedger->stateMap().family().db()}; if (std::addressof(dstDB) != std::addressof(srcDB)) { - Blob blob{node->getData()}; + Blob blob{nodeObject->getData()}; dstDB.store( hotLEDGER, std::move(blob), mHash, mLedger->info().seq); } diff --git a/src/ripple/app/ledger/impl/LedgerMaster.cpp b/src/ripple/app/ledger/impl/LedgerMaster.cpp index 4b07d7d9e..1ac7bb122 100644 --- a/src/ripple/app/ledger/impl/LedgerMaster.cpp +++ b/src/ripple/app/ledger/impl/LedgerMaster.cpp @@ -703,7 +703,7 @@ LedgerMaster::tryFill(Job& job, std::shared_ptr ledger) if (it == ledgerHashes.end()) break; - if (!nodeStore.fetch( + if (!nodeStore.fetchNodeObject( ledgerHashes.begin()->second.first, ledgerHashes.begin()->first)) { @@ -1572,10 +1572,11 @@ LedgerMaster::getCloseTimeByHash( LedgerHash const& ledgerHash, std::uint32_t index) { - auto node = app_.getNodeStore().fetch(ledgerHash, index); - if (node && (node->getData().size() >= 120)) + auto nodeObject = app_.getNodeStore().fetchNodeObject(ledgerHash, index); + if (nodeObject && (nodeObject->getData().size() >= 120)) { - SerialIter it(node->getData().data(), node->getData().size()); + SerialIter it( + nodeObject->getData().data(), nodeObject->getData().size()); if (safe_cast(it.get32()) == HashPrefix::ledgerMaster) { it.skip( diff --git a/src/ripple/app/main/Application.cpp b/src/ripple/app/main/Application.cpp index 2094d5194..b9acd63e7 100644 --- a/src/ripple/app/main/Application.cpp +++ b/src/ripple/app/main/Application.cpp @@ -1241,8 +1241,7 @@ private: bool nodeToShards(); - bool - validateShards(); + void startGenesisLedger(); @@ -1476,12 +1475,9 @@ ApplicationImp::setup() if (!config_->standalone()) { - // validation and node import require the sqlite db + // NodeStore import into the ShardStore requires the SQLite database if (config_->nodeToShard && !nodeToShards()) return false; - - if (config_->validateShards && !validateShards()) - return false; } validatorSites_->start(); @@ -2173,27 +2169,6 @@ ApplicationImp::nodeToShards() return true; } -bool -ApplicationImp::validateShards() -{ - assert(overlay_); - assert(!config_->standalone()); - - if (config_->section(ConfigSection::shardDatabase()).empty()) - { - JLOG(m_journal.fatal()) - << "The [shard_db] configuration setting must be set"; - return false; - } - if (!shardStore_) - { - JLOG(m_journal.fatal()) << "Invalid [shard_db] configuration"; - return false; - } - shardStore_->validate(); - return true; -} - void ApplicationImp::setMaxDisallowedLedger() { diff --git a/src/ripple/app/main/Main.cpp b/src/ripple/app/main/Main.cpp index b7c569e92..6b767812d 100644 --- a/src/ripple/app/main/Main.cpp +++ b/src/ripple/app/main/Main.cpp @@ -353,12 +353,6 @@ run(int argc, char** argv) importText += ConfigSection::nodeDatabase(); importText += "] configuration file section)."; } - std::string shardsText; - { - shardsText += "Validate an existing shard database (specified in the ["; - shardsText += ConfigSection::shardDatabase(); - shardsText += "] configuration file section)."; - } // Set up option parsing. // @@ -388,8 +382,7 @@ run(int argc, char** argv) "replay", "Replay a ledger close.")( "start", "Start from a fresh Ledger.")( "vacuum", "VACUUM the transaction db.")( - "valid", "Consider the initial ledger a valid network ledger.")( - "validateShards", shardsText.c_str()); + "valid", "Consider the initial ledger a valid network ledger."); po::options_description rpc("RPC Client Options"); rpc.add_options()( @@ -611,9 +604,6 @@ run(int argc, char** argv) if (vm.count("nodetoshard")) config->nodeToShard = true; - if (vm.count("validateShards")) - config->validateShards = true; - if (vm.count("ledger")) { config->START_LEDGER = vm["ledger"].as(); diff --git a/src/ripple/app/main/NodeStoreScheduler.cpp b/src/ripple/app/main/NodeStoreScheduler.cpp index dd58b08e7..379ecb8b6 100644 --- a/src/ripple/app/main/NodeStoreScheduler.cpp +++ b/src/ripple/app/main/NodeStoreScheduler.cpp @@ -80,7 +80,8 @@ NodeStoreScheduler::onFetch(NodeStore::FetchReport const& report) { if (report.wentToDisk) m_jobQueue->addLoadEvents( - report.isAsync ? jtNS_ASYNC_READ : jtNS_SYNC_READ, + report.fetchType == NodeStore::FetchType::async ? jtNS_ASYNC_READ + : jtNS_SYNC_READ, 1, report.elapsed); } diff --git a/src/ripple/app/misc/SHAMapStoreImp.cpp b/src/ripple/app/misc/SHAMapStoreImp.cpp index da1c29452..642593be5 100644 --- a/src/ripple/app/misc/SHAMapStoreImp.cpp +++ b/src/ripple/app/misc/SHAMapStoreImp.cpp @@ -302,7 +302,8 @@ SHAMapStoreImp::copyNode( SHAMapAbstractNode const& node) { // Copy a single record from node to dbRotating_ - dbRotating_->fetch(node.getNodeHash().as_uint256(), node.getSeq()); + dbRotating_->fetchNodeObject( + node.getNodeHash().as_uint256(), node.getSeq()); if (!(++nodeCount % checkHealthInterval_)) { if (health()) diff --git a/src/ripple/app/misc/SHAMapStoreImp.h b/src/ripple/app/misc/SHAMapStoreImp.h index 6145cb48d..7200f7eba 100644 --- a/src/ripple/app/misc/SHAMapStoreImp.h +++ b/src/ripple/app/misc/SHAMapStoreImp.h @@ -211,7 +211,7 @@ private: for (auto const& key : cache.getKeys()) { - dbRotating_->fetch(key, 0); + dbRotating_->fetchNodeObject(key, 0); if (!(++check % checkHealthInterval_) && health()) return true; } diff --git a/src/ripple/core/Config.h b/src/ripple/core/Config.h index c885dacee..2915f1415 100644 --- a/src/ripple/core/Config.h +++ b/src/ripple/core/Config.h @@ -56,7 +56,8 @@ enum class SizedItem : std::size_t { nodeCacheAge, hashNodeDBCache, txnDBCache, - lgrDBCache + lgrDBCache, + openFinalLimit }; // This entire derived class is deprecated. @@ -113,7 +114,6 @@ private: public: bool doImport = false; bool nodeToShard = false; - bool validateShards = false; bool ELB_SUPPORT = false; std::vector IPS; // Peer IPs from rippled.cfg. diff --git a/src/ripple/core/impl/Config.cpp b/src/ripple/core/impl/Config.cpp index d8c88bec6..b42947b53 100644 --- a/src/ripple/core/impl/Config.cpp +++ b/src/ripple/core/impl/Config.cpp @@ -40,24 +40,25 @@ namespace ripple { -inline constexpr std::array>, 11> - sizedItems{{ - // FIXME: We should document each of these items, explaining exactly - // what - // they control and whether there exists an explicit config - // option that can be used to override the default. - {SizedItem::sweepInterval, {{10, 30, 60, 90, 120}}}, - {SizedItem::treeCacheSize, {{128000, 256000, 512000, 768000, 2048000}}}, - {SizedItem::treeCacheAge, {{30, 60, 90, 120, 900}}}, - {SizedItem::ledgerSize, {{32, 128, 256, 384, 768}}}, - {SizedItem::ledgerAge, {{30, 90, 180, 240, 900}}}, - {SizedItem::ledgerFetch, {{2, 3, 4, 5, 8}}}, - {SizedItem::nodeCacheSize, {{16384, 32768, 131072, 262144, 524288}}}, - {SizedItem::nodeCacheAge, {{60, 90, 120, 900, 1800}}}, - {SizedItem::hashNodeDBCache, {{4, 12, 24, 64, 128}}}, - {SizedItem::txnDBCache, {{4, 12, 24, 64, 128}}}, - {SizedItem::lgrDBCache, {{4, 8, 16, 32, 128}}}, - }}; +inline constexpr std::array>, 12> + sizedItems{ + {// FIXME: We should document each of these items, explaining exactly + // what + // they control and whether there exists an explicit config + // option that can be used to override the default. + {SizedItem::sweepInterval, {{10, 30, 60, 90, 120}}}, + {SizedItem::treeCacheSize, + {{128000, 256000, 512000, 768000, 2048000}}}, + {SizedItem::treeCacheAge, {{30, 60, 90, 120, 900}}}, + {SizedItem::ledgerSize, {{32, 128, 256, 384, 768}}}, + {SizedItem::ledgerAge, {{30, 90, 180, 240, 900}}}, + {SizedItem::ledgerFetch, {{2, 3, 4, 5, 8}}}, + {SizedItem::nodeCacheSize, {{16384, 32768, 131072, 262144, 524288}}}, + {SizedItem::nodeCacheAge, {{60, 90, 120, 900, 1800}}}, + {SizedItem::hashNodeDBCache, {{4, 12, 24, 64, 128}}}, + {SizedItem::txnDBCache, {{4, 12, 24, 64, 128}}}, + {SizedItem::lgrDBCache, {{4, 8, 16, 32, 128}}}, + {SizedItem::openFinalLimit, {{8, 16, 32, 64, 128}}}}}; // Ensure that the order of entries in the table corresponds to the // order of entries in the enum: diff --git a/src/ripple/nodestore/Backend.h b/src/ripple/nodestore/Backend.h index d3abc3314..070e1038e 100644 --- a/src/ripple/nodestore/Backend.h +++ b/src/ripple/nodestore/Backend.h @@ -58,6 +58,11 @@ public: virtual void open(bool createIfMissing = true) = 0; + /** Returns true is the database is open. + */ + virtual bool + isOpen() = 0; + /** Close the backend. This allows the caller to catch exceptions. */ diff --git a/src/ripple/nodestore/Database.h b/src/ripple/nodestore/Database.h index a82d7a700..b1a788843 100644 --- a/src/ripple/nodestore/Database.h +++ b/src/ripple/nodestore/Database.h @@ -103,7 +103,7 @@ public: @param data The payload of the object. The caller's variable is overwritten. @param hash The 256-bit hash of the payload data. - @param seq The sequence of the ledger the object belongs to. + @param ledgerSeq The sequence of the ledger the object belongs to. @return `true` if the object was stored? */ @@ -112,20 +112,24 @@ public: NodeObjectType type, Blob&& data, uint256 const& hash, - std::uint32_t seq) = 0; + std::uint32_t ledgerSeq) = 0; - /** Fetch an object. + /** Fetch a node object. If the object is known to be not in the database, isn't found in the database during the fetch, or failed to load correctly during the fetch, `nullptr` is returned. @note This can be called concurrently. @param hash The key of the object to retrieve. - @param seq The sequence of the ledger where the object is stored. + @param ledgerSeq The sequence of the ledger where the object is stored. + @param fetchType the type of fetch, synchronous or asynchronous. @return The object, or nullptr if it couldn't be retrieved. */ - virtual std::shared_ptr - fetch(uint256 const& hash, std::uint32_t seq) = 0; + std::shared_ptr + fetchNodeObject( + uint256 const& hash, + std::uint32_t ledgerSeq, + FetchType fetchType = FetchType::synchronous); /** Fetch an object without waiting. If I/O is required to determine whether or not the object is present, @@ -135,19 +139,19 @@ public: @note This can be called concurrently. @param hash The key of the object to retrieve - @param seq The sequence of the ledger where the object is stored. - @param object The object retrieved + @param ledgerSeq The sequence of the ledger where the object is stored. + @param nodeObject The object retrieved @return Whether the operation completed */ virtual bool asyncFetch( uint256 const& hash, - std::uint32_t seq, - std::shared_ptr& object) = 0; + std::uint32_t ledgerSeq, + std::shared_ptr& nodeObject) = 0; - /** Copies a ledger stored in a different database to this one. + /** Store a ledger from a different database. - @param ledger The ledger to copy. + @param srcLedger The ledger to store. @return true if the operation was successful */ virtual bool @@ -160,12 +164,12 @@ public: /** Get the maximum number of async reads the node store prefers. - @param seq A ledger sequence specifying a shard to query. + @param ledgerSeq A ledger sequence specifying a shard to query. @return The number of async reads preferred. @note The sequence is only used with the shard store. */ virtual int - getDesiredAsyncReadCount(std::uint32_t seq) = 0; + getDesiredAsyncReadCount(std::uint32_t ledgerSeq) = 0; /** Get the positive cache hits to total attempts ratio. */ virtual float @@ -187,7 +191,7 @@ public: @return The total read and written bytes. */ - std::uint32_t + std::uint64_t getStoreCount() const { return storeCount_; @@ -205,7 +209,7 @@ public: return fetchHitCount_; } - std::uint32_t + std::uint64_t getStoreSize() const { return storeSz_; @@ -243,68 +247,47 @@ protected: Scheduler& scheduler_; int fdRequired_{0}; - void - stopThreads(); + std::atomic fetchHitCount_{0}; + std::atomic fetchSz_{0}; void - storeStats(size_t sz) + stopReadThreads(); + + void + storeStats(std::uint64_t count, std::uint64_t sz) { - ++storeCount_; + assert(count <= sz); + storeCount_ += count; storeSz_ += sz; } // Called by the public asyncFetch function void - asyncFetch( - uint256 const& hash, - std::uint32_t seq, - std::shared_ptr> const& pCache, - std::shared_ptr> const& nCache); - - // Called by the public fetch function - std::shared_ptr - fetchInternal(uint256 const& hash, std::shared_ptr backend); + asyncFetch(uint256 const& hash, std::uint32_t ledgerSeq); // Called by the public import function void importInternal(Backend& dstBackend, Database& srcDB); - std::shared_ptr - doFetch( - uint256 const& hash, - std::uint32_t seq, - TaggedCache& pCache, - KeyCache& nCache, - bool isAsync); - // Called by the public storeLedger function bool storeLedger( Ledger const& srcLedger, std::shared_ptr dstBackend, std::shared_ptr> dstPCache, - std::shared_ptr> dstNCache, - std::shared_ptr next); + std::shared_ptr> dstNCache); private: - std::atomic storeCount_{0}; - std::atomic fetchTotalCount_{0}; - std::atomic fetchHitCount_{0}; - std::atomic storeSz_{0}; - std::atomic fetchSz_{0}; + std::atomic storeCount_{0}; + std::atomic storeSz_{0}; + std::atomic fetchTotalCount_{0}; std::mutex readLock_; std::condition_variable readCondVar_; std::condition_variable readGenCondVar_; // reads to do - std::map< - uint256, - std::tuple< - std::uint32_t, - std::weak_ptr>, - std::weak_ptr>>> - read_; + std::map read_; // last read uint256 readLastHash_; @@ -320,7 +303,10 @@ private: std::uint32_t const earliestLedgerSeq_; virtual std::shared_ptr - fetchFrom(uint256 const& hash, std::uint32_t seq) = 0; + fetchNodeObject( + uint256 const& hash, + std::uint32_t ledgerSeq, + FetchReport& fetchReport) = 0; /** Visit every object in the database This is usually called during import. diff --git a/src/ripple/nodestore/DatabaseShard.h b/src/ripple/nodestore/DatabaseShard.h index 3d0860272..1f3f92530 100644 --- a/src/ripple/nodestore/DatabaseShard.h +++ b/src/ripple/nodestore/DatabaseShard.h @@ -66,9 +66,9 @@ public: /** Prepare to store a new ledger in the shard being acquired - @param validLedgerSeq The index of the maximum valid ledgers + @param validLedgerSeq The sequence of the maximum valid ledgers @return If a ledger should be fetched and stored, then returns the - ledger index of the ledger to request. Otherwise returns boost::none. + ledger sequence of the ledger to request. Otherwise returns boost::none. Some reasons this may return boost::none are: all shards are stored and full, max allowed disk space would be exceeded, or a ledger was recently requested and not enough time has passed @@ -136,13 +136,6 @@ public: virtual std::string getCompleteShards() = 0; - /** Verifies shard store data is valid. - - @param app The application object - */ - virtual void - validate() = 0; - /** @return The maximum number of ledgers stored in a shard */ virtual std::uint32_t @@ -188,10 +181,10 @@ public: constexpr std::uint32_t seqToShardIndex( - std::uint32_t seq, + std::uint32_t ledgerSeq, std::uint32_t ledgersPerShard = DatabaseShard::ledgersPerShardDefault) { - return (seq - 1) / ledgersPerShard; + return (ledgerSeq - 1) / ledgersPerShard; } extern std::unique_ptr diff --git a/src/ripple/nodestore/Scheduler.h b/src/ripple/nodestore/Scheduler.h index 9d12f6d2d..03f6e185b 100644 --- a/src/ripple/nodestore/Scheduler.h +++ b/src/ripple/nodestore/Scheduler.h @@ -26,15 +26,19 @@ namespace ripple { namespace NodeStore { +enum class FetchType { synchronous, async }; + /** Contains information about a fetch operation. */ struct FetchReport { - explicit FetchReport() = default; + explicit FetchReport(FetchType fetchType_) : fetchType(fetchType_) + { + } std::chrono::milliseconds elapsed; - bool isAsync; - bool wentToDisk; - bool wasFound; + FetchType const fetchType; + bool wentToDisk = false; + bool wasFound = false; }; /** Contains information about a batch write operation. */ diff --git a/src/ripple/nodestore/ShardPool.md b/src/ripple/nodestore/ShardPool.md new file mode 100644 index 000000000..2079feabb --- /dev/null +++ b/src/ripple/nodestore/ShardPool.md @@ -0,0 +1,43 @@ +# Open Shard Management + +## Overview + +Shard NuDB and SQLite databases consume server resources. This can be unnecessarily taxing on servers with many shards. The open shard management feature aims to improve the situation by managing a limited number of open shard database connections. The feature, which is integrated into the existing DatabaseShardImp and Shard classes, maintains a limited pool of open databases prioritized by their last use time stamp. The following sections describe the feature in greater detail. + +### Open Shard Management + +The open shard management feature is integrated into the DatabaseShardImp and Shard classes. As the DatabaseShardImp sweep function is periodically called, the number of finalized open shards, which constitutes the open pool, are examined. Upon the pool exceeding a pool limit, an attempt is made to close enough open shards to remain within the limit. Shards to be closed are selected based on their last use time stamp, which is automatically updated on database access. If necessary, shards will automatically open their databases when accessed. + +```C++ + if (openFinals.size() > openFinalLimit_) + { + // Try to close enough shards to be within the limit. + // Sort on largest elapsed time since last use. + std::sort( + openFinals.begin(), + openFinals.end(), + [&](std::shared_ptr const& lhsShard, + std::shared_ptr const& rhsShard) { + return lhsShard->getLastUse() > rhsShard->getLastUse(); + }); + + for (auto it{openFinals.cbegin()}; + it != openFinals.cend() && openFinals.size() > openFinalLimit_;) + { + if ((*it)->tryClose()) + it = openFinals.erase(it); + else + ++it; + } + } +``` + +### Shard + +When closing an open shard, DatabaseShardImp will call the Shard 'tryClose' function. This function will only close the shard databases if there are no outstanding references. + +DatabaseShardImp will use the Shard 'isOpen' function to determine the state of a shard's database. + +### Caveats + +The Shard class must check the state of its databases before use. Prior use assumed databases were always open, that is no longer the case with the open shard management feature. diff --git a/src/ripple/nodestore/backend/MemoryFactory.cpp b/src/ripple/nodestore/backend/MemoryFactory.cpp index a71343ef3..e3b9937b0 100644 --- a/src/ripple/nodestore/backend/MemoryFactory.cpp +++ b/src/ripple/nodestore/backend/MemoryFactory.cpp @@ -114,6 +114,12 @@ public: db_ = &memoryFactory.open(name_); } + bool + isOpen() override + { + return static_cast(db_); + } + void close() override { diff --git a/src/ripple/nodestore/backend/NuDBFactory.cpp b/src/ripple/nodestore/backend/NuDBFactory.cpp index 8147f218c..095634109 100644 --- a/src/ripple/nodestore/backend/NuDBFactory.cpp +++ b/src/ripple/nodestore/backend/NuDBFactory.cpp @@ -132,6 +132,12 @@ public: Throw("nodestore: unknown appnum"); } + bool + isOpen() override + { + return db_.is_open(); + } + void close() override { diff --git a/src/ripple/nodestore/backend/NullFactory.cpp b/src/ripple/nodestore/backend/NullFactory.cpp index 2624fa970..eddce54a7 100644 --- a/src/ripple/nodestore/backend/NullFactory.cpp +++ b/src/ripple/nodestore/backend/NullFactory.cpp @@ -43,6 +43,12 @@ public: { } + bool + isOpen() override + { + return false; + } + void close() override { diff --git a/src/ripple/nodestore/backend/RocksDBFactory.cpp b/src/ripple/nodestore/backend/RocksDBFactory.cpp index 87ef712cd..b5f5627a9 100644 --- a/src/ripple/nodestore/backend/RocksDBFactory.cpp +++ b/src/ripple/nodestore/backend/RocksDBFactory.cpp @@ -226,6 +226,12 @@ public: m_db.reset(db); } + bool + isOpen() override + { + return static_cast(m_db); + } + void close() override { diff --git a/src/ripple/nodestore/impl/Database.cpp b/src/ripple/nodestore/impl/Database.cpp index 1f1ca0152..178d4bb7f 100644 --- a/src/ripple/nodestore/impl/Database.cpp +++ b/src/ripple/nodestore/impl/Database.cpp @@ -49,12 +49,12 @@ Database::Database( Database::~Database() { // NOTE! - // Any derived class should call the stopThreads() method in its + // Any derived class should call the stopReadThreads() method in its // destructor. Otherwise, occasionally, the derived class may // crash during shutdown when its members are accessed by one of // these threads after the derived class is destroyed but before // this base class is destroyed. - stopThreads(); + stopReadThreads(); } void @@ -80,7 +80,7 @@ Database::onStop() { // After stop time we can no longer use the JobQueue for background // reads. Join the background read threads. - stopThreads(); + stopReadThreads(); } void @@ -90,7 +90,7 @@ Database::onChildrenStopped() } void -Database::stopThreads() +Database::stopReadThreads() { { std::lock_guard lock(readLock_); @@ -107,123 +107,77 @@ Database::stopThreads() } void -Database::asyncFetch( - uint256 const& hash, - std::uint32_t seq, - std::shared_ptr> const& pCache, - std::shared_ptr> const& nCache) +Database::asyncFetch(uint256 const& hash, std::uint32_t ledgerSeq) { // Post a read std::lock_guard lock(readLock_); - if (read_.emplace(hash, std::make_tuple(seq, pCache, nCache)).second) + if (read_.emplace(hash, ledgerSeq).second) readCondVar_.notify_one(); } -std::shared_ptr -Database::fetchInternal(uint256 const& hash, std::shared_ptr backend) -{ - std::shared_ptr nObj; - Status status; - try - { - status = backend->fetch(hash.begin(), &nObj); - } - catch (std::exception const& e) - { - JLOG(j_.fatal()) << "Exception, " << e.what(); - Rethrow(); - } - - switch (status) - { - case ok: - ++fetchHitCount_; - if (nObj) - fetchSz_ += nObj->getData().size(); - break; - case notFound: - break; - case dataCorrupt: - // VFALCO TODO Deal with encountering corrupt data! - JLOG(j_.fatal()) << "Corrupt NodeObject #" << hash; - break; - default: - JLOG(j_.warn()) << "Unknown status=" << status; - break; - } - return nObj; -} - void Database::importInternal(Backend& dstBackend, Database& srcDB) { - Batch b; - b.reserve(batchWritePreallocationSize); - srcDB.for_each([&](std::shared_ptr nObj) { - assert(nObj); - if (!nObj) // This should never happen + Batch batch; + batch.reserve(batchWritePreallocationSize); + auto storeBatch = [&]() { + try + { + dstBackend.storeBatch(batch); + } + catch (std::exception const& e) + { + JLOG(j_.error()) << "Exception caught in function " << __func__ + << ". Error: " << e.what(); + return; + } + + std::uint64_t sz{0}; + for (auto const& nodeObject : batch) + sz += nodeObject->getData().size(); + storeStats(batch.size(), sz); + batch.clear(); + }; + + srcDB.for_each([&](std::shared_ptr nodeObject) { + assert(nodeObject); + if (!nodeObject) // This should never happen return; - ++storeCount_; - storeSz_ += nObj->getData().size(); - - b.push_back(nObj); - if (b.size() >= batchWritePreallocationSize) - { - dstBackend.storeBatch(b); - b.clear(); - b.reserve(batchWritePreallocationSize); - } + batch.emplace_back(std::move(nodeObject)); + if (batch.size() >= batchWritePreallocationSize) + storeBatch(); }); - if (!b.empty()) - dstBackend.storeBatch(b); + + if (!batch.empty()) + storeBatch(); } // Perform a fetch and report the time it took std::shared_ptr -Database::doFetch( +Database::fetchNodeObject( uint256 const& hash, - std::uint32_t seq, - TaggedCache& pCache, - KeyCache& nCache, - bool isAsync) + std::uint32_t ledgerSeq, + FetchType fetchType) { - FetchReport report; - report.isAsync = isAsync; - report.wentToDisk = false; + FetchReport fetchReport(fetchType); using namespace std::chrono; - auto const before = steady_clock::now(); + auto const begin{steady_clock::now()}; - // See if the object already exists in the cache - auto nObj = pCache.fetch(hash); - if (!nObj && !nCache.touch_if_exists(hash)) + auto nodeObject{fetchNodeObject(hash, ledgerSeq, fetchReport)}; + if (nodeObject) { - // Try the database(s) - report.wentToDisk = true; - nObj = fetchFrom(hash, seq); - ++fetchTotalCount_; - if (!nObj) - { - // Just in case a write occurred - nObj = pCache.fetch(hash); - if (!nObj) - // We give up - nCache.insert(hash); - } - else - { - // Ensure all threads get the same object - pCache.canonicalize_replace_client(hash, nObj); - - // Since this was a 'hard' fetch, we will log it. - JLOG(j_.trace()) << "HOS: " << hash << " fetch: in db"; - } + ++fetchHitCount_; + fetchSz_ += nodeObject->getData().size(); } - report.wasFound = static_cast(nObj); - report.elapsed = duration_cast(steady_clock::now() - before); - scheduler_.onFetch(report); - return nObj; + if (fetchReport.wentToDisk) + ++fetchTotalCount_; + + fetchReport.elapsed = + duration_cast(steady_clock::now() - begin); + scheduler_.onFetch(fetchReport); + return nodeObject; } bool @@ -231,58 +185,52 @@ Database::storeLedger( Ledger const& srcLedger, std::shared_ptr dstBackend, std::shared_ptr> dstPCache, - std::shared_ptr> dstNCache, - std::shared_ptr next) + std::shared_ptr> dstNCache) { - assert(static_cast(dstPCache) == static_cast(dstNCache)); - if (srcLedger.info().hash.isZero() || srcLedger.info().accountHash.isZero()) - { - assert(false); - JLOG(j_.error()) << "source ledger seq " << srcLedger.info().seq - << " is invalid"; + auto fail = [&](std::string const& msg) { + JLOG(j_.error()) << "Source ledger sequence " << srcLedger.info().seq + << ". " << msg; return false; - } + }; + + if (!dstPCache || !dstNCache) + return fail("Invalid destination cache"); + if (srcLedger.info().hash.isZero()) + return fail("Invalid hash"); + if (srcLedger.info().accountHash.isZero()) + return fail("Invalid account hash"); + auto& srcDB = const_cast(srcLedger.stateMap().family().db()); if (&srcDB == this) - { - assert(false); - JLOG(j_.error()) << "source and destination databases are the same"; - return false; - } + return fail("Source and destination databases are the same"); Batch batch; batch.reserve(batchWritePreallocationSize); auto storeBatch = [&]() { - if (dstPCache && dstNCache) + std::uint64_t sz{0}; + for (auto const& nodeObject : batch) { - for (auto& nObj : batch) - { - dstPCache->canonicalize_replace_cache(nObj->getHash(), nObj); - dstNCache->erase(nObj->getHash()); - storeStats(nObj->getData().size()); - } + dstPCache->canonicalize_replace_cache( + nodeObject->getHash(), nodeObject); + dstNCache->erase(nodeObject->getHash()); + sz += nodeObject->getData().size(); } - dstBackend->storeBatch(batch); + + try + { + dstBackend->storeBatch(batch); + } + catch (std::exception const& e) + { + fail( + std::string("Exception caught in function ") + __func__ + + ". Error: " + e.what()); + return false; + } + + storeStats(batch.size(), sz); batch.clear(); - batch.reserve(batchWritePreallocationSize); - }; - bool error = false; - auto visit = [&](SHAMapAbstractNode& node) { - if (auto nObj = srcDB.fetch( - node.getNodeHash().as_uint256(), srcLedger.info().seq)) - { - batch.emplace_back(std::move(nObj)); - if (batch.size() < batchWritePreallocationSize) - return true; - - storeBatch(); - - if (!isStopping()) - return true; - } - - error = true; - return false; + return true; }; // Store ledger header @@ -295,43 +243,48 @@ Database::storeLedger( batch.emplace_back(std::move(nObj)); } + bool error = false; + auto visit = [&](SHAMapAbstractNode& node) { + if (!isStopping()) + { + if (auto nodeObject = srcDB.fetchNodeObject( + node.getNodeHash().as_uint256(), srcLedger.info().seq)) + { + batch.emplace_back(std::move(nodeObject)); + if (batch.size() < batchWritePreallocationSize || storeBatch()) + return true; + } + } + + error = true; + return false; + }; + // Store the state map if (srcLedger.stateMap().getHash().isNonZero()) { if (!srcLedger.stateMap().isValid()) - { - JLOG(j_.error()) << "source ledger seq " << srcLedger.info().seq - << " state map invalid"; - return false; - } - if (next && next->info().parentHash == srcLedger.info().hash) - { - auto have = next->stateMap().snapShot(false); - srcLedger.stateMap().snapShot(false)->visitDifferences( - &(*have), visit); - } - else - srcLedger.stateMap().snapShot(false)->visitNodes(visit); + return fail("Invalid state map"); + + srcLedger.stateMap().snapShot(false)->visitNodes(visit); if (error) - return false; + return fail("Failed to store state map"); } // Store the transaction map if (srcLedger.info().txHash.isNonZero()) { if (!srcLedger.txMap().isValid()) - { - JLOG(j_.error()) << "source ledger seq " << srcLedger.info().seq - << " transaction map invalid"; - return false; - } + return fail("Invalid transaction map"); + srcLedger.txMap().snapShot(false)->visitNodes(visit); if (error) - return false; + return fail("Failed to store transaction map"); } - if (!batch.empty()) - storeBatch(); + if (!batch.empty() && !storeBatch()) + return fail("Failed to store"); + return true; } @@ -344,8 +297,6 @@ Database::threadEntry() { uint256 lastHash; std::uint32_t lastSeq; - std::shared_ptr> lastPcache; - std::shared_ptr> lastNcache; { std::unique_lock lock(readLock_); while (!readShut_ && read_.empty()) @@ -367,16 +318,13 @@ Database::threadEntry() readGenCondVar_.notify_all(); } lastHash = it->first; - lastSeq = std::get<0>(it->second); - lastPcache = std::get<1>(it->second).lock(); - lastNcache = std::get<2>(it->second).lock(); + lastSeq = it->second; read_.erase(it); readLastHash_ = lastHash; } // Perform the read - if (lastPcache && lastNcache) - doFetch(lastHash, lastSeq, *lastPcache, *lastNcache, true); + fetchNodeObject(lastHash, lastSeq, FetchType::async); } } diff --git a/src/ripple/nodestore/impl/DatabaseNodeImp.cpp b/src/ripple/nodestore/impl/DatabaseNodeImp.cpp index 705447191..966aa8b49 100644 --- a/src/ripple/nodestore/impl/DatabaseNodeImp.cpp +++ b/src/ripple/nodestore/impl/DatabaseNodeImp.cpp @@ -29,27 +29,28 @@ DatabaseNodeImp::store( NodeObjectType type, Blob&& data, uint256 const& hash, - std::uint32_t seq) + std::uint32_t) { auto nObj = NodeObject::createObject(type, std::move(data), hash); pCache_->canonicalize_replace_cache(hash, nObj); backend_->store(nObj); nCache_->erase(hash); - storeStats(nObj->getData().size()); + storeStats(1, nObj->getData().size()); } bool DatabaseNodeImp::asyncFetch( uint256 const& hash, - std::uint32_t seq, - std::shared_ptr& object) + std::uint32_t ledgerSeq, + std::shared_ptr& nodeObject) { // See if the object is in cache - object = pCache_->fetch(hash); - if (object || nCache_->touch_if_exists(hash)) + nodeObject = pCache_->fetch(hash); + if (nodeObject || nCache_->touch_if_exists(hash)) return true; + // Otherwise post a read - Database::asyncFetch(hash, seq, pCache_, nCache_); + Database::asyncFetch(hash, ledgerSeq); return false; } @@ -69,5 +70,69 @@ DatabaseNodeImp::sweep() nCache_->sweep(); } +std::shared_ptr +DatabaseNodeImp::fetchNodeObject( + uint256 const& hash, + std::uint32_t, + FetchReport& fetchReport) +{ + // See if the node object exists in the cache + auto nodeObject{pCache_->fetch(hash)}; + if (!nodeObject && !nCache_->touch_if_exists(hash)) + { + // Try the backend + fetchReport.wentToDisk = true; + + Status status; + try + { + status = backend_->fetch(hash.data(), &nodeObject); + } + catch (std::exception const& e) + { + JLOG(j_.fatal()) << "Exception, " << e.what(); + Rethrow(); + } + + switch (status) + { + case ok: + ++fetchHitCount_; + if (nodeObject) + fetchSz_ += nodeObject->getData().size(); + break; + case notFound: + break; + case dataCorrupt: + JLOG(j_.fatal()) << "Corrupt NodeObject #" << hash; + break; + default: + JLOG(j_.warn()) << "Unknown status=" << status; + break; + } + + if (!nodeObject) + { + // Just in case a write occurred + nodeObject = pCache_->fetch(hash); + if (!nodeObject) + // We give up + nCache_->insert(hash); + } + else + { + fetchReport.wasFound = true; + + // Ensure all threads get the same object + pCache_->canonicalize_replace_client(hash, nodeObject); + + // Since this was a 'hard' fetch, we will log it + JLOG(j_.trace()) << "HOS: " << hash << " fetch: in shard db"; + } + } + + return nodeObject; +} + } // namespace NodeStore } // namespace ripple diff --git a/src/ripple/nodestore/impl/DatabaseNodeImp.h b/src/ripple/nodestore/impl/DatabaseNodeImp.h index 2e9525e96..1ca149cf2 100644 --- a/src/ripple/nodestore/impl/DatabaseNodeImp.h +++ b/src/ripple/nodestore/impl/DatabaseNodeImp.h @@ -62,8 +62,8 @@ public: ~DatabaseNodeImp() override { - // Stop threads before data members are destroyed. - stopThreads(); + // Stop read threads in base before data members are destroyed + stopReadThreads(); } std::string @@ -85,33 +85,22 @@ public: } void - store( - NodeObjectType type, - Blob&& data, - uint256 const& hash, - std::uint32_t seq) override; - - std::shared_ptr - fetch(uint256 const& hash, std::uint32_t seq) override - { - return doFetch(hash, seq, *pCache_, *nCache_, false); - } + store(NodeObjectType type, Blob&& data, uint256 const& hash, std::uint32_t) + override; bool asyncFetch( uint256 const& hash, - std::uint32_t seq, - std::shared_ptr& object) override; + std::uint32_t ledgerSeq, + std::shared_ptr& nodeObject) override; bool storeLedger(std::shared_ptr const& srcLedger) override { - return Database::storeLedger( - *srcLedger, backend_, pCache_, nCache_, nullptr); + return Database::storeLedger(*srcLedger, backend_, pCache_, nCache_); } - int - getDesiredAsyncReadCount(std::uint32_t seq) override + int getDesiredAsyncReadCount(std::uint32_t) override { // We prefer a client not fill our cache // We don't want to push data out of the cache @@ -142,10 +131,10 @@ private: std::shared_ptr backend_; std::shared_ptr - fetchFrom(uint256 const& hash, std::uint32_t seq) override - { - return fetchInternal(hash, backend_); - } + fetchNodeObject( + uint256 const& hash, + std::uint32_t, + FetchReport& fetchReport) override; void for_each(std::function)> f) override diff --git a/src/ripple/nodestore/impl/DatabaseRotatingImp.cpp b/src/ripple/nodestore/impl/DatabaseRotatingImp.cpp index 9b9f966ae..f43daf2e8 100644 --- a/src/ripple/nodestore/impl/DatabaseRotatingImp.cpp +++ b/src/ripple/nodestore/impl/DatabaseRotatingImp.cpp @@ -101,8 +101,7 @@ DatabaseRotatingImp::storeLedger(std::shared_ptr const& srcLedger) return writableBackend_; }(); - return Database::storeLedger( - *srcLedger, backend, pCache_, nCache_, nullptr); + return Database::storeLedger(*srcLedger, backend, pCache_, nCache_); } void @@ -110,7 +109,7 @@ DatabaseRotatingImp::store( NodeObjectType type, Blob&& data, uint256 const& hash, - std::uint32_t seq) + std::uint32_t) { auto nObj = NodeObject::createObject(type, std::move(data), hash); pCache_->canonicalize_replace_cache(hash, nObj); @@ -122,22 +121,22 @@ DatabaseRotatingImp::store( backend->store(nObj); nCache_->erase(hash); - storeStats(nObj->getData().size()); + storeStats(1, nObj->getData().size()); } bool DatabaseRotatingImp::asyncFetch( uint256 const& hash, - std::uint32_t seq, - std::shared_ptr& object) + std::uint32_t ledgerSeq, + std::shared_ptr& nodeObject) { // See if the object is in cache - object = pCache_->fetch(hash); - if (object || nCache_->touch_if_exists(hash)) + nodeObject = pCache_->fetch(hash); + if (nodeObject || nCache_->touch_if_exists(hash)) return true; // Otherwise post a read - Database::asyncFetch(hash, seq, pCache_, nCache_); + Database::asyncFetch(hash, ledgerSeq); return false; } @@ -158,33 +157,96 @@ DatabaseRotatingImp::sweep() } std::shared_ptr -DatabaseRotatingImp::fetchFrom(uint256 const& hash, std::uint32_t seq) +DatabaseRotatingImp::fetchNodeObject( + uint256 const& hash, + std::uint32_t, + FetchReport& fetchReport) { - auto [writable, archive] = [&] { - std::lock_guard lock(mutex_); - return std::make_pair(writableBackend_, archiveBackend_); - }(); - - // Try to fetch from the writable backend - auto nObj = fetchInternal(hash, writable); - if (!nObj) - { - // Otherwise try to fetch from the archive backend - nObj = fetchInternal(hash, archive); - if (nObj) + auto fetch = [&](std::shared_ptr const& backend) { + Status status; + std::shared_ptr nodeObject; + try { - { - // Refresh the writable backend pointer - std::lock_guard lock(mutex_); - writable = writableBackend_; - } + status = backend->fetch(hash.data(), &nodeObject); + } + catch (std::exception const& e) + { + JLOG(j_.fatal()) << "Exception, " << e.what(); + Rethrow(); + } - // Update writable backend with data from the archive backend - writable->store(nObj); - nCache_->erase(hash); + switch (status) + { + case ok: + ++fetchHitCount_; + if (nodeObject) + fetchSz_ += nodeObject->getData().size(); + break; + case notFound: + break; + case dataCorrupt: + JLOG(j_.fatal()) << "Corrupt NodeObject #" << hash; + break; + default: + JLOG(j_.warn()) << "Unknown status=" << status; + break; + } + + return nodeObject; + }; + + // See if the node object exists in the cache + auto nodeObject{pCache_->fetch(hash)}; + if (!nodeObject && !nCache_->touch_if_exists(hash)) + { + auto [writable, archive] = [&] { + std::lock_guard lock(mutex_); + return std::make_pair(writableBackend_, archiveBackend_); + }(); + + fetchReport.wentToDisk = true; + + // Try to fetch from the writable backend + nodeObject = fetch(writable); + if (!nodeObject) + { + // Otherwise try to fetch from the archive backend + nodeObject = fetch(archive); + if (nodeObject) + { + { + // Refresh the writable backend pointer + std::lock_guard lock(mutex_); + writable = writableBackend_; + } + + // Update writable backend with data from the archive backend + writable->store(nodeObject); + nCache_->erase(hash); + } + } + + if (!nodeObject) + { + // Just in case a write occurred + nodeObject = pCache_->fetch(hash); + if (!nodeObject) + // We give up + nCache_->insert(hash); + } + else + { + fetchReport.wasFound = true; + + // Ensure all threads get the same object + pCache_->canonicalize_replace_client(hash, nodeObject); + + // Since this was a 'hard' fetch, we will log it + JLOG(j_.trace()) << "HOS: " << hash << " fetch: in shard db"; } } - return nObj; + + return nodeObject; } void diff --git a/src/ripple/nodestore/impl/DatabaseRotatingImp.h b/src/ripple/nodestore/impl/DatabaseRotatingImp.h index ea6c92567..b49c220d9 100644 --- a/src/ripple/nodestore/impl/DatabaseRotatingImp.h +++ b/src/ripple/nodestore/impl/DatabaseRotatingImp.h @@ -45,8 +45,8 @@ public: ~DatabaseRotatingImp() override { - // Stop threads before data members are destroyed. - stopThreads(); + // Stop read threads in base before data members are destroyed + stopReadThreads(); } void @@ -64,29 +64,19 @@ public: import(Database& source) override; void - store( - NodeObjectType type, - Blob&& data, - uint256 const& hash, - std::uint32_t seq) override; - - std::shared_ptr - fetch(uint256 const& hash, std::uint32_t seq) override - { - return doFetch(hash, seq, *pCache_, *nCache_, false); - } + store(NodeObjectType type, Blob&& data, uint256 const& hash, std::uint32_t) + override; bool asyncFetch( uint256 const& hash, - std::uint32_t seq, - std::shared_ptr& object) override; + std::uint32_t ledgerSeq, + std::shared_ptr& nodeObject) override; bool storeLedger(std::shared_ptr const& srcLedger) override; - int - getDesiredAsyncReadCount(std::uint32_t seq) override + int getDesiredAsyncReadCount(std::uint32_t) override { // We prefer a client not fill our cache // We don't want to push data out of the cache @@ -124,7 +114,10 @@ private: mutable std::mutex mutex_; std::shared_ptr - fetchFrom(uint256 const& hash, std::uint32_t seq) override; + fetchNodeObject( + uint256 const& hash, + std::uint32_t, + FetchReport& fetchReport) override; void for_each(std::function)> f) override; diff --git a/src/ripple/nodestore/impl/DatabaseShardImp.cpp b/src/ripple/nodestore/impl/DatabaseShardImp.cpp index ee2137fc6..067423d3a 100644 --- a/src/ripple/nodestore/impl/DatabaseShardImp.cpp +++ b/src/ripple/nodestore/impl/DatabaseShardImp.cpp @@ -57,15 +57,12 @@ DatabaseShardImp::DatabaseShardImp( , parent_(parent) , taskQueue_(std::make_unique(*this)) , earliestShardIndex_(seqToShardIndex(earliestLedgerSeq())) - , avgShardFileSz_(ledgersPerShard_ * kilobytes(192)) + , avgShardFileSz_(ledgersPerShard_ * kilobytes(192ull)) + , openFinalLimit_( + app.config().getValueFor(SizedItem::openFinalLimit, boost::none)) { } -DatabaseShardImp::~DatabaseShardImp() -{ - onStop(); -} - bool DatabaseShardImp::init() { @@ -87,32 +84,22 @@ DatabaseShardImp::init() { using namespace boost::filesystem; - // Consolidate the main storage path and all - // historical paths + // Consolidate the main storage path and all historical paths std::vector paths{dir_}; paths.insert( paths.end(), historicalPaths_.begin(), historicalPaths_.end()); for (auto const& path : paths) { - try + if (exists(path)) { - if (exists(path)) + if (!is_directory(path)) { - if (!is_directory(path)) - { - JLOG(j_.error()) << path << " must be a directory"; - return false; - } - } - else if (!create_directories(path)) - { - JLOG(j_.error()) - << "failed to create path: " + path.string(); + JLOG(j_.error()) << path << " must be a directory"; return false; } } - catch (...) + else if (!create_directories(path)) { JLOG(j_.error()) << "failed to create path: " + path.string(); @@ -122,8 +109,7 @@ DatabaseShardImp::init() if (!app_.config().standalone() && !historicalPaths_.empty()) { - // Check historical paths for duplicated - // filesystems + // Check historical paths for duplicated file systems if (!checkHistoricalPaths()) return false; } @@ -132,17 +118,18 @@ DatabaseShardImp::init() ctx_->start(); // Find shards + std::uint32_t openFinals{0}; for (auto const& path : paths) { - for (auto const& d : directory_iterator(path)) + for (auto const& it : directory_iterator(path)) { - if (!is_directory(d)) + // Ignore files + if (!is_directory(it)) continue; - auto const shardDir = d.path(); - - // Check shard directory name is numeric - auto dirName = shardDir.stem().string(); + // Ignore nonnumerical directory names + auto const shardDir{it.path()}; + auto dirName{shardDir.stem().string()}; if (!std::all_of( dirName.begin(), dirName.end(), [](auto c) { return ::isdigit(static_cast(c)); @@ -151,14 +138,15 @@ DatabaseShardImp::init() continue; } + // Ignore values below the earliest shard index auto const shardIndex{std::stoul(dirName)}; if (shardIndex < earliestShardIndex()) { - JLOG(j_.error()) + JLOG(j_.debug()) << "shard " << shardIndex - << " comes before earliest shard index " + << " ignored, comes before earliest shard index " << earliestShardIndex(); - return false; + continue; } // Check if a previous import failed @@ -171,9 +159,9 @@ DatabaseShardImp::init() continue; } - auto shard{std::make_unique( + auto shard{std::make_shared( app_, *this, shardIndex, shardDir.parent_path(), j_)}; - if (!shard->open(scheduler_, *ctx_)) + if (!shard->init(scheduler_, *ctx_)) { // Remove corrupted or legacy shard shard->removeOnDestroy(); @@ -184,44 +172,46 @@ DatabaseShardImp::init() continue; } - if (shard->isFinal()) + switch (shard->getState()) { - shards_.emplace( - shardIndex, - ShardInfo( - std::move(shard), ShardInfo::State::final)); - } - else if (shard->isBackendComplete()) - { - auto const result{shards_.emplace( - shardIndex, - ShardInfo( - std::move(shard), ShardInfo::State::none))}; - finalizeShard( - result.first->second, true, lock, boost::none); - } - else - { - if (acquireIndex_ != 0) - { - JLOG(j_.error()) - << "more than one shard being acquired"; - return false; - } + case Shard::final: + if (++openFinals > openFinalLimit_) + shard->tryClose(); + shards_.emplace(shardIndex, std::move(shard)); + break; - shards_.emplace( - shardIndex, - ShardInfo( - std::move(shard), ShardInfo::State::acquire)); - acquireIndex_ = shardIndex; + case Shard::complete: + finalizeShard( + shards_.emplace(shardIndex, std::move(shard)) + .first->second, + true, + boost::none); + break; + + case Shard::acquire: + if (acquireIndex_ != 0) + { + JLOG(j_.error()) + << "more than one shard being acquired"; + return false; + } + + shards_.emplace(shardIndex, std::move(shard)); + acquireIndex_ = shardIndex; + break; + + default: + JLOG(j_.error()) + << "shard " << shardIndex << " invalid state"; + return false; } } } } catch (std::exception const& e) { - JLOG(j_.error()) - << "exception " << e.what() << " in function " << __func__; + JLOG(j_.fatal()) << "Exception caught in function " << __func__ + << ". Error: " << e.what(); return false; } @@ -245,8 +235,10 @@ DatabaseShardImp::prepareLedger(std::uint32_t validLedgerSeq) if (acquireIndex_ != 0) { - if (auto it{shards_.find(acquireIndex_)}; it != shards_.end()) - return it->second.shard->prepare(); + if (auto const it{shards_.find(acquireIndex_)}; it != shards_.end()) + return it->second->prepare(); + + // Should never get here assert(false); return boost::none; } @@ -288,18 +280,16 @@ DatabaseShardImp::prepareLedger(std::uint32_t validLedgerSeq) j_); }(); - if (!shard->open(scheduler_, *ctx_)) + if (!shard->init(scheduler_, *ctx_)) return boost::none; - auto const seq{shard->prepare()}; + auto const ledgerSeq{shard->prepare()}; { std::lock_guard lock(mutex_); - shards_.emplace( - *shardIndex, - ShardInfo(std::move(shard), ShardInfo::State::acquire)); + shards_.emplace(*shardIndex, std::move(shard)); acquireIndex_ = *shardIndex; } - return seq; + return ledgerSeq; } bool @@ -309,11 +299,6 @@ DatabaseShardImp::prepareShard(std::uint32_t shardIndex) JLOG(j.error()) << "shard " << shardIndex << " " << msg; return false; }; - std::lock_guard lock(mutex_); - assert(init_); - - if (!canAdd_) - return fail("cannot be stored at this time"); if (shardIndex < earliestShardIndex()) { @@ -322,43 +307,49 @@ DatabaseShardImp::prepareShard(std::uint32_t shardIndex) std::to_string(earliestShardIndex())); } - // If we are synced to the network, check if the shard index - // is greater or equal to the current shard. - auto seqCheck = [&](std::uint32_t seq) { - // seq will be greater than zero if valid - if (seq >= earliestLedgerSeq() && shardIndex >= seqToShardIndex(seq)) - return fail("has an invalid index"); + // If we are synced to the network, check if the shard index is + // greater or equal to the current or validated shard index. + auto seqCheck = [&](std::uint32_t ledgerSeq) { + if (ledgerSeq >= earliestLedgerSeq() && + shardIndex >= seqToShardIndex(ledgerSeq)) + { + return fail("invalid index"); + } return true; }; if (!seqCheck(app_.getLedgerMaster().getValidLedgerIndex() + 1) || !seqCheck(app_.getLedgerMaster().getCurrentLedgerIndex())) { - return false; + return fail("invalid index"); } - if (shards_.find(shardIndex) != shards_.end()) - { - JLOG(j_.debug()) << "shard " << shardIndex - << " is already stored or queued for import"; - return false; - } + // Any shard earlier than the two most recent shards is a historical shard + auto const isHistoricalShard{shardIndex < shardBoundaryIndex()}; - // Any shard earlier than the two most recent shards - // is a historical shard - bool const isHistoricalShard = shardIndex < shardBoundaryIndex(lock); - auto const numHistShards = numHistoricalShards(lock); + std::lock_guard lock(mutex_); + assert(init_); + + if (!canAdd_) + return fail("cannot be stored at this time"); // Check shard count and available storage space - if (isHistoricalShard && numHistShards >= maxHistoricalShards_) + if (isHistoricalShard && numHistoricalShards(lock) >= maxHistoricalShards_) return fail("maximum number of historical shards reached"); + if (!sufficientStorage( 1, isHistoricalShard ? PathDesignation::historical : PathDesignation::none, lock)) + { return fail("insufficient storage space available"); + } + + if (shards_.find(shardIndex) != shards_.end()) + return fail("already stored"); + if (!preparedIndexes_.emplace(shardIndex).second) + return fail("already queued for import"); - shards_.emplace(shardIndex, ShardInfo(nullptr, ShardInfo::State::import)); return true; } @@ -368,11 +359,7 @@ DatabaseShardImp::removePreShard(std::uint32_t shardIndex) std::lock_guard lock(mutex_); assert(init_); - if (auto const it{shards_.find(shardIndex)}; - it != shards_.end() && it->second.state == ShardInfo::State::import) - { - shards_.erase(it); - } + preparedIndexes_.erase(shardIndex); } std::string @@ -383,9 +370,8 @@ DatabaseShardImp::getPreShards() std::lock_guard lock(mutex_); assert(init_); - for (auto const& e : shards_) - if (e.second.state == ShardInfo::State::import) - rs.insert(e.first); + for (auto const& shardIndex : preparedIndexes_) + rs.insert(shardIndex); } if (rs.empty()) @@ -399,31 +385,59 @@ DatabaseShardImp::importShard( std::uint32_t shardIndex, boost::filesystem::path const& srcDir) { + auto fail = [&](std::string const& msg, + std::lock_guard const& lock) { + JLOG(j_.error()) << "shard " << shardIndex << " " << msg; + + // Remove the failed import shard index so it can be retried + preparedIndexes_.erase(shardIndex); + return false; + }; + using namespace boost::filesystem; try { if (!is_directory(srcDir) || is_empty(srcDir)) { - JLOG(j_.error()) << "invalid source directory " << srcDir.string(); - return false; + return fail( + "invalid source directory " + srcDir.string(), + std::lock_guard(mutex_)); } } catch (std::exception const& e) { - JLOG(j_.error()) << "exception " << e.what() << " in function " - << __func__; - return false; + return fail( + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what(), + std::lock_guard(mutex_)); } - auto expectedHash = app_.getLedgerMaster().walkHashBySeq( - lastLedgerSeq(shardIndex), InboundLedger::Reason::GENERIC); - + auto const expectedHash{app_.getLedgerMaster().walkHashBySeq( + lastLedgerSeq(shardIndex), InboundLedger::Reason::GENERIC)}; if (!expectedHash) + return fail("expected hash not found", std::lock_guard(mutex_)); + + path dstDir; { - JLOG(j_.error()) << "shard " << shardIndex - << " expected hash not found"; - return false; + std::lock_guard lock(mutex_); + if (shards_.find(shardIndex) != shards_.end()) + return fail("already exists", lock); + + // Check shard was prepared for import + if (preparedIndexes_.find(shardIndex) == preparedIndexes_.end()) + return fail("was not prepared for import", lock); + + auto const pathDesignation{ + prepareForNewShard(shardIndex, numHistoricalShards(lock), lock)}; + if (!pathDesignation) + return fail("failed to import", lock); + + if (*pathDesignation == PathDesignation::historical) + dstDir = chooseHistoricalPath(lock); + else + dstDir = dir_; } + dstDir /= std::to_string(shardIndex); auto renameDir = [&](path const& src, path const& dst) { try @@ -432,42 +446,14 @@ DatabaseShardImp::importShard( } catch (std::exception const& e) { - JLOG(j_.error()) - << "exception " << e.what() << " in function " << __func__; - return false; + return fail( + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what(), + std::lock_guard(mutex_)); } return true; }; - path dstDir; - { - std::lock_guard lock(mutex_); - assert(init_); - - // Check shard is prepared - if (auto const it{shards_.find(shardIndex)}; it == shards_.end() || - it->second.shard || it->second.state != ShardInfo::State::import) - { - JLOG(j_.error()) << "shard " << shardIndex << " failed to import"; - return false; - } - - auto const pathDesignation = - prepareForNewShard(shardIndex, numHistoricalShards(lock), lock); - - if (!pathDesignation) - { - JLOG(j_.error()) << "shard " << shardIndex << " failed to import"; - return false; - } - - auto const needsHistoricalPath = - *pathDesignation == PathDesignation::historical; - dstDir = needsHistoricalPath ? chooseHistoricalPath(lock) : dir_; - } - - dstDir /= std::to_string(shardIndex); - // Rename source directory to the shard database directory if (!renameDir(srcDir, dstDir)) return false; @@ -476,92 +462,84 @@ DatabaseShardImp::importShard( auto shard{std::make_unique( app_, *this, shardIndex, dstDir.parent_path(), j_)}; - if (!shard->open(scheduler_, *ctx_) || !shard->isBackendComplete()) + if (!shard->init(scheduler_, *ctx_) || shard->getState() != Shard::complete) { - JLOG(j_.error()) << "shard " << shardIndex << " failed to import"; shard.reset(); renameDir(dstDir, srcDir); - return false; + return fail("failed to import", std::lock_guard(mutex_)); } - { + auto const [it, inserted] = [&]() { std::lock_guard lock(mutex_); - auto const it{shards_.find(shardIndex)}; - if (it == shards_.end() || it->second.shard || - it->second.state != ShardInfo::State::import) - { - JLOG(j_.error()) << "shard " << shardIndex << " failed to import"; - shard.reset(); - renameDir(dstDir, srcDir); - return false; - } + preparedIndexes_.erase(shardIndex); + return shards_.emplace(shardIndex, std::move(shard)); + }(); - it->second.shard = std::move(shard); - finalizeShard(it->second, true, lock, expectedHash); + if (!inserted) + { + shard.reset(); + renameDir(dstDir, srcDir); + return fail("failed to import", std::lock_guard(mutex_)); } + finalizeShard(it->second, true, expectedHash); return true; } std::shared_ptr -DatabaseShardImp::fetchLedger(uint256 const& hash, std::uint32_t seq) +DatabaseShardImp::fetchLedger(uint256 const& hash, std::uint32_t ledgerSeq) { - auto const shardIndex{seqToShardIndex(seq)}; + auto const shardIndex{seqToShardIndex(ledgerSeq)}; { std::shared_ptr shard; - ShardInfo::State state; { std::lock_guard lock(mutex_); assert(init_); - if (auto const it{shards_.find(shardIndex)}; it != shards_.end()) - { - shard = it->second.shard; - state = it->second.state; - } - else - return {}; + auto const it{shards_.find(shardIndex)}; + if (it == shards_.end()) + return nullptr; + shard = it->second; } - // Check if the ledger is stored in a final shard - // or in the shard being acquired - switch (state) + // Ledger must be stored in a final or acquiring shard + switch (shard->getState()) { - case ShardInfo::State::final: + case Shard::final: break; - case ShardInfo::State::acquire: - if (shard->containsLedger(seq)) + case Shard::acquire: + if (shard->containsLedger(ledgerSeq)) break; [[fallthrough]]; default: - return {}; + return nullptr; } } - auto nObj{fetch(hash, seq)}; - if (!nObj) - return {}; + auto const nodeObject{Database::fetchNodeObject(hash, ledgerSeq)}; + if (!nodeObject) + return nullptr; - auto fail = [this, seq](std::string const& msg) -> std::shared_ptr { - JLOG(j_.error()) << "shard " << seqToShardIndex(seq) << " " << msg; - return {}; + auto fail = [&](std::string const& msg) -> std::shared_ptr { + JLOG(j_.error()) << "shard " << shardIndex << " " << msg; + return nullptr; }; auto ledger{std::make_shared( - deserializePrefixedHeader(makeSlice(nObj->getData())), + deserializePrefixedHeader(makeSlice(nodeObject->getData())), app_.config(), *app_.getShardFamily())}; - if (ledger->info().seq != seq) + if (ledger->info().seq != ledgerSeq) { return fail( - "encountered invalid ledger sequence " + std::to_string(seq)); + "encountered invalid ledger sequence " + std::to_string(ledgerSeq)); } if (ledger->info().hash != hash) { return fail( "encountered invalid ledger hash " + to_string(hash) + - " on sequence " + std::to_string(seq)); + " on sequence " + std::to_string(ledgerSeq)); } ledger->setFull(); @@ -570,7 +548,7 @@ DatabaseShardImp::fetchLedger(uint256 const& hash, std::uint32_t seq) { return fail( "is missing root STATE node on hash " + to_string(hash) + - " on sequence " + std::to_string(seq)); + " on sequence " + std::to_string(ledgerSeq)); } if (ledger->info().txHash.isNonZero()) @@ -580,7 +558,7 @@ DatabaseShardImp::fetchLedger(uint256 const& hash, std::uint32_t seq) { return fail( "is missing root TXN node on hash " + to_string(hash) + - " on sequence " + std::to_string(seq)); + " on sequence " + std::to_string(ledgerSeq)); } } return ledger; @@ -589,33 +567,34 @@ DatabaseShardImp::fetchLedger(uint256 const& hash, std::uint32_t seq) void DatabaseShardImp::setStored(std::shared_ptr const& ledger) { + auto const ledgerSeq{ledger->info().seq}; if (ledger->info().hash.isZero()) { JLOG(j_.error()) << "zero ledger hash for ledger sequence " - << ledger->info().seq; + << ledgerSeq; return; } if (ledger->info().accountHash.isZero()) { JLOG(j_.error()) << "zero account hash for ledger sequence " - << ledger->info().seq; + << ledgerSeq; return; } if (ledger->stateMap().getHash().isNonZero() && !ledger->stateMap().isValid()) { JLOG(j_.error()) << "invalid state map for ledger sequence " - << ledger->info().seq; + << ledgerSeq; return; } if (ledger->info().txHash.isNonZero() && !ledger->txMap().isValid()) { JLOG(j_.error()) << "invalid transaction map for ledger sequence " - << ledger->info().seq; + << ledgerSeq; return; } - auto const shardIndex{seqToShardIndex(ledger->info().seq)}; + auto const shardIndex{seqToShardIndex(ledgerSeq)}; std::shared_ptr shard; { std::lock_guard lock(mutex_); @@ -628,17 +607,23 @@ DatabaseShardImp::setStored(std::shared_ptr const& ledger) return; } - if (auto const it{shards_.find(shardIndex)}; it != shards_.end()) - shard = it->second.shard; - else + auto const it{shards_.find(shardIndex)}; + if (it == shards_.end()) { JLOG(j_.error()) << "shard " << shardIndex << " is not being acquired"; return; } + shard = it->second; } - storeLedgerInShard(shard, ledger); + if (shard->containsLedger(ledgerSeq)) + { + JLOG(j_.trace()) << "shard " << shardIndex << " ledger already stored"; + return; + } + + setStoredInShard(shard, ledger); } std::string @@ -651,50 +636,50 @@ DatabaseShardImp::getCompleteShards() } void -DatabaseShardImp::validate() +DatabaseShardImp::onStop() +{ + // Stop read threads in base before data members are destroyed + stopReadThreads(); + + std::lock_guard lock(mutex_); + + // Notify shards to stop + for (auto const& e : shards_) + e.second->stop(); +} + +void +DatabaseShardImp::onChildrenStopped() { std::vector> shards; { std::lock_guard lock(mutex_); - assert(init_); - // Only shards with a state of final should be validated - for (auto& e : shards_) - if (e.second.state == ShardInfo::State::final) - shards.push_back(e.second.shard); - - if (shards.empty()) - return; - - JLOG(j_.debug()) << "Validating shards " << status_; + shards.reserve(shards_.size()); + for (auto const& e : shards_) + shards.push_back(e.second); + shards_.clear(); } + // All shards should be expired at this point for (auto const& e : shards) { - if (auto shard{e.lock()}; shard) - shard->finalize(true, boost::none); + if (!e.expired()) + { + std::string shardIndex; + if (auto const shard{e.lock()}; shard) + shardIndex = std::to_string(shard->index()); + + JLOG(j_.warn()) << " shard " << shardIndex << " unexpired"; + } } - app_.getShardFamily()->reset(); -} - -void -DatabaseShardImp::onStop() -{ - // Stop read threads in base before data members are destroyed - stopThreads(); - - std::lock_guard lock(mutex_); - if (shards_.empty()) - return; - - // Notify and destroy shards - for (auto& e : shards_) + if (areChildrenStopped()) + stopped(); + else { - if (e.second.shard) - e.second.shard->stop(); + JLOG(j_.warn()) << " Children failed to stop"; } - shards_.clear(); } void @@ -718,41 +703,41 @@ DatabaseShardImp::import(Database& source) auto loadLedger = [&](bool ascendSort = true) -> boost::optional { std::shared_ptr ledger; - std::uint32_t seq; - std::tie(ledger, seq, std::ignore) = loadLedgerHelper( + std::uint32_t ledgerSeq; + std::tie(ledger, ledgerSeq, std::ignore) = loadLedgerHelper( "WHERE LedgerSeq >= " + std::to_string(earliestLedgerSeq()) + " order by LedgerSeq " + (ascendSort ? "asc" : "desc") + " limit 1", app_, false); - if (!ledger || seq == 0) + if (!ledger || ledgerSeq == 0) { JLOG(j_.error()) << "no suitable ledgers were found in" " the SQLite database to import"; return boost::none; } - return seq; + return ledgerSeq; }; // Find earliest ledger sequence stored - auto seq{loadLedger()}; - if (!seq) + auto ledgerSeq{loadLedger()}; + if (!ledgerSeq) return; - earliestIndex = seqToShardIndex(*seq); + earliestIndex = seqToShardIndex(*ledgerSeq); // Consider only complete shards - if (seq != firstLedgerSeq(earliestIndex)) + if (ledgerSeq != firstLedgerSeq(earliestIndex)) ++earliestIndex; // Find last ledger sequence stored - seq = loadLedger(false); - if (!seq) + ledgerSeq = loadLedger(false); + if (!ledgerSeq) return; - latestIndex = seqToShardIndex(*seq); + latestIndex = seqToShardIndex(*ledgerSeq); // Consider only complete shards - if (seq != lastLedgerSeq(latestIndex)) + if (ledgerSeq != lastLedgerSeq(latestIndex)) --latestIndex; if (latestIndex < earliestIndex) @@ -779,11 +764,26 @@ DatabaseShardImp::import(Database& source) auto const needsHistoricalPath = *pathDesignation == PathDesignation::historical; - // Skip if already stored - if (shardIndex == acquireIndex_ || - shards_.find(shardIndex) != shards_.end()) + // Skip if being acquired + if (shardIndex == acquireIndex_) { - JLOG(j_.debug()) << "shard " << shardIndex << " already exists"; + JLOG(j_.debug()) + << "shard " << shardIndex << " already being acquired"; + continue; + } + + // Skip if being imported + if (preparedIndexes_.find(shardIndex) != preparedIndexes_.end()) + { + JLOG(j_.debug()) + << "shard " << shardIndex << " already being imported"; + continue; + } + + // Skip if stored + if (shards_.find(shardIndex) != shards_.end()) + { + JLOG(j_.debug()) << "shard " << shardIndex << " already stored"; continue; } @@ -802,7 +802,7 @@ DatabaseShardImp::import(Database& source) bool valid{true}; for (std::uint32_t n = firstSeq; n <= lastSeq; n += 256) { - if (!source.fetch(ledgerHashes[n].first, n)) + if (!source.fetchNodeObject(ledgerHashes[n].first, n)) { JLOG(j_.warn()) << "SQLite ledger sequence " << n << " mismatches node store"; @@ -818,10 +818,9 @@ DatabaseShardImp::import(Database& source) needsHistoricalPath ? chooseHistoricalPath(lock) : dir_; // Create the new shard - auto shard = - std::make_unique(app_, *this, shardIndex, path, j_); - - if (!shard->open(scheduler_, *ctx_)) + auto shard{ + std::make_unique(app_, *this, shardIndex, path, j_)}; + if (!shard->init(scheduler_, *ctx_)) continue; // Create a marker file to signify an import in progress @@ -843,33 +842,29 @@ DatabaseShardImp::import(Database& source) std::shared_ptr recentStored; boost::optional lastLedgerHash; - while (auto seq = shard->prepare()) + while (auto const ledgerSeq = shard->prepare()) { - auto ledger{loadByIndex(*seq, app_, false)}; - if (!ledger || ledger->info().seq != seq) + auto ledger{loadByIndex(*ledgerSeq, app_, false)}; + if (!ledger || ledger->info().seq != ledgerSeq) break; - if (!Database::storeLedger( - *ledger, - shard->getBackend(), - nullptr, - nullptr, - recentStored)) - { - break; - } - - if (!shard->store(ledger)) + auto const result{shard->storeLedger(ledger, recentStored)}; + storeStats(result.count, result.size); + if (result.error) break; - if (!lastLedgerHash && seq == lastLedgerSeq(shardIndex)) + if (!shard->setLedgerStored(ledger)) + break; + + if (!lastLedgerHash && ledgerSeq == lastLedgerSeq(shardIndex)) lastLedgerHash = ledger->info().hash; - recentStored = ledger; + recentStored = std::move(ledger); } using namespace boost::filesystem; - if (lastLedgerHash && shard->isBackendComplete()) + bool success{false}; + if (lastLedgerHash && shard->getState() == Shard::complete) { // Store shard final key Serializer s; @@ -877,37 +872,36 @@ DatabaseShardImp::import(Database& source) s.add32(firstLedgerSeq(shardIndex)); s.add32(lastLedgerSeq(shardIndex)); s.addBitString(*lastLedgerHash); - auto nObj{NodeObject::createObject( + auto const nodeObject{NodeObject::createObject( hotUNKNOWN, std::move(s.modData()), Shard::finalKey)}; - try + if (shard->storeNodeObject(nodeObject)) { - shard->getBackend()->store(nObj); + try + { + // The import process is complete and the + // marker file is no longer required + remove_all(markerFile); - // The import process is complete and the - // marker file is no longer required - remove_all(markerFile); - - JLOG(j_.debug()) << "shard " << shardIndex - << " was successfully imported"; - - auto const result{shards_.emplace( - shardIndex, - ShardInfo(std::move(shard), ShardInfo::State::none))}; - finalizeShard( - result.first->second, true, lock, boost::none); - - if (shardIndex < shardBoundaryIndex(lock)) - ++numHistShards; - } - catch (std::exception const& e) - { - JLOG(j_.error()) << "exception " << e.what() - << " in function " << __func__; - shard->removeOnDestroy(); + JLOG(j_.debug()) << "shard " << shardIndex + << " was successfully imported"; + finalizeShard( + shards_.emplace(shardIndex, std::move(shard)) + .first->second, + true, + boost::none); + success = true; + } + catch (std::exception const& e) + { + JLOG(j_.fatal()) << "shard index " << shardIndex + << ". Exception caught in function " + << __func__ << ". Error: " << e.what(); + } } } - else + + if (!success) { JLOG(j_.error()) << "shard " << shardIndex << " failed to import"; @@ -929,13 +923,13 @@ DatabaseShardImp::getWriteLoad() const std::lock_guard lock(mutex_); assert(init_); - if (auto const it{shards_.find(acquireIndex_)}; it != shards_.end()) - shard = it->second.shard; - else + auto const it{shards_.find(acquireIndex_)}; + if (it == shards_.end()) return 0; + shard = it->second; } - return shard->getBackend()->getWriteLoad(); + return shard->getWriteLoad(); } void @@ -943,14 +937,12 @@ DatabaseShardImp::store( NodeObjectType type, Blob&& data, uint256 const& hash, - std::uint32_t seq) + std::uint32_t ledgerSeq) { - auto const shardIndex{seqToShardIndex(seq)}; + auto const shardIndex{seqToShardIndex(ledgerSeq)}; std::shared_ptr shard; { std::lock_guard lock(mutex_); - assert(init_); - if (shardIndex != acquireIndex_) { JLOG(j_.trace()) @@ -958,59 +950,52 @@ DatabaseShardImp::store( return; } - if (auto const it{shards_.find(shardIndex)}; it != shards_.end()) - shard = it->second.shard; - else + auto const it{shards_.find(shardIndex)}; + if (it == shards_.end()) { JLOG(j_.error()) << "shard " << shardIndex << " is not being acquired"; return; } + shard = it->second; } - auto [backend, pCache, nCache] = shard->getBackendAll(); - auto nObj{NodeObject::createObject(type, std::move(data), hash)}; - - pCache->canonicalize_replace_cache(hash, nObj); - backend->store(nObj); - nCache->erase(hash); - - storeStats(nObj->getData().size()); -} - -std::shared_ptr -DatabaseShardImp::fetch(uint256 const& hash, std::uint32_t seq) -{ - auto cache{getCache(seq)}; - if (cache.first) - return doFetch(hash, seq, *cache.first, *cache.second, false); - return {}; + auto const nodeObject{ + NodeObject::createObject(type, std::move(data), hash)}; + if (shard->storeNodeObject(nodeObject)) + storeStats(1, nodeObject->getData().size()); } bool DatabaseShardImp::asyncFetch( uint256 const& hash, - std::uint32_t seq, - std::shared_ptr& object) + std::uint32_t ledgerSeq, + std::shared_ptr& nodeObject) { - auto cache{getCache(seq)}; - if (cache.first) + std::shared_ptr shard; { - // See if the object is in cache - object = cache.first->fetch(hash); - if (object || cache.second->touch_if_exists(hash)) - return true; - // Otherwise post a read - Database::asyncFetch(hash, seq, cache.first, cache.second); + std::lock_guard lock(mutex_); + assert(init_); + + auto const it{shards_.find(acquireIndex_)}; + if (it == shards_.end()) + return false; + shard = it->second; } + + if (shard->fetchNodeObjectFromCache(hash, nodeObject)) + return true; + + // Otherwise post a read + Database::asyncFetch(hash, ledgerSeq); return false; } bool DatabaseShardImp::storeLedger(std::shared_ptr const& srcLedger) { - auto const seq{srcLedger->info().seq}; - auto const shardIndex{seqToShardIndex(seq)}; + auto const ledgerSeq{srcLedger->info().seq}; + auto const shardIndex{seqToShardIndex(ledgerSeq)}; std::shared_ptr shard; { std::lock_guard lock(mutex_); @@ -1023,54 +1008,40 @@ DatabaseShardImp::storeLedger(std::shared_ptr const& srcLedger) return false; } - if (auto const it{shards_.find(shardIndex)}; it != shards_.end()) - shard = it->second.shard; - else + auto const it{shards_.find(shardIndex)}; + if (it == shards_.end()) { JLOG(j_.error()) << "shard " << shardIndex << " is not being acquired"; return false; } + shard = it->second; } - if (shard->containsLedger(seq)) - { - JLOG(j_.trace()) << "shard " << shardIndex << " ledger already stored"; + auto const result{shard->storeLedger(srcLedger, nullptr)}; + storeStats(result.count, result.size); + if (result.error || result.count == 0 || result.size == 0) return false; - } - { - auto [backend, pCache, nCache] = shard->getBackendAll(); - if (!Database::storeLedger( - *srcLedger, backend, pCache, nCache, nullptr)) - { - return false; - } - } - - return storeLedgerInShard(shard, srcLedger); + return setStoredInShard(shard, srcLedger); } int -DatabaseShardImp::getDesiredAsyncReadCount(std::uint32_t seq) +DatabaseShardImp::getDesiredAsyncReadCount(std::uint32_t ledgerSeq) { - auto const shardIndex{seqToShardIndex(seq)}; + auto const shardIndex{seqToShardIndex(ledgerSeq)}; std::shared_ptr shard; { std::lock_guard lock(mutex_); assert(init_); - if (auto const it{shards_.find(shardIndex)}; it != shards_.end() && - (it->second.state == ShardInfo::State::final || - it->second.state == ShardInfo::State::acquire)) - { - shard = it->second.shard; - } - else + auto const it{shards_.find(shardIndex)}; + if (it == shards_.end()) return 0; + shard = it->second; } - return shard->pCache()->getTargetSize() / asyncDivider; + return shard->getDesiredAsyncReadCount(); } float @@ -1081,13 +1052,13 @@ DatabaseShardImp::getCacheHitRate() std::lock_guard lock(mutex_); assert(init_); - if (auto const it{shards_.find(acquireIndex_)}; it != shards_.end()) - shard = it->second.shard; - else + auto const it{shards_.find(acquireIndex_)}; + if (it == shards_.end()) return 0; + shard = it->second; } - return shard->pCache()->getHitRate(); + return shard->getCacheHitRate(); } void @@ -1098,23 +1069,54 @@ DatabaseShardImp::sweep() std::lock_guard lock(mutex_); assert(init_); + shards.reserve(shards_.size()); for (auto const& e : shards_) - if (e.second.state == ShardInfo::State::final || - e.second.state == ShardInfo::State::acquire) - { - shards.push_back(e.second.shard); - } + shards.push_back(e.second); } + std::vector> openFinals; + openFinals.reserve(openFinalLimit_); + for (auto const& e : shards) { - if (auto shard{e.lock()}; shard) + if (auto const shard{e.lock()}; shard && shard->isOpen()) + { shard->sweep(); + + if (shard->getState() == Shard::final) + openFinals.emplace_back(std::move(shard)); + } + } + + if (openFinals.size() > openFinalLimit_) + { + JLOG(j_.trace()) << "Open shards exceed configured limit of " + << openFinalLimit_ << " by " + << (openFinals.size() - openFinalLimit_); + + // Try to close enough shards to be within the limit. + // Sort ascending on last use so the oldest are removed first. + std::sort( + openFinals.begin(), + openFinals.end(), + [&](std::shared_ptr const& lhsShard, + std::shared_ptr const& rhsShard) { + return lhsShard->getLastUse() < rhsShard->getLastUse(); + }); + + for (auto it{openFinals.cbegin()}; + it != openFinals.cend() && openFinals.size() > openFinalLimit_;) + { + if ((*it)->tryClose()) + it = openFinals.erase(it); + else + ++it; + } } } bool -DatabaseShardImp::initConfig(std::lock_guard&) +DatabaseShardImp::initConfig(std::lock_guard const&) { auto fail = [j = j_](std::string const& msg) { JLOG(j.error()) << "[" << ConfigSection::shardDatabase() << "] " << msg; @@ -1202,30 +1204,28 @@ DatabaseShardImp::initConfig(std::lock_guard&) } std::shared_ptr -DatabaseShardImp::fetchFrom(uint256 const& hash, std::uint32_t seq) +DatabaseShardImp::fetchNodeObject( + uint256 const& hash, + std::uint32_t ledgerSeq, + FetchReport& fetchReport) { - auto const shardIndex{seqToShardIndex(seq)}; + auto const shardIndex{seqToShardIndex(ledgerSeq)}; std::shared_ptr shard; { std::lock_guard lock(mutex_); - assert(init_); - - if (auto const it{shards_.find(shardIndex)}; - it != shards_.end() && it->second.shard) - { - shard = it->second.shard; - } - else - return {}; + auto const it{shards_.find(shardIndex)}; + if (it == shards_.end()) + return nullptr; + shard = it->second; } - return fetchInternal(hash, shard->getBackend()); + return shard->fetchNodeObject(hash, fetchReport); } boost::optional DatabaseShardImp::findAcquireIndex( std::uint32_t validLedgerSeq, - std::lock_guard&) + std::lock_guard const&) { if (validLedgerSeq < earliestLedgerSeq()) return boost::none; @@ -1254,8 +1254,11 @@ DatabaseShardImp::findAcquireIndex( shardIndex <= maxShardIndex; ++shardIndex) { - if (shards_.find(shardIndex) == shards_.end()) + if (shards_.find(shardIndex) == shards_.end() && + preparedIndexes_.find(shardIndex) == preparedIndexes_.end()) + { available.push_back(shardIndex); + } } if (available.empty()) @@ -1274,8 +1277,11 @@ DatabaseShardImp::findAcquireIndex( for (int i = 0; i < 40; ++i) { auto const shardIndex{rand_int(earliestShardIndex(), maxShardIndex)}; - if (shards_.find(shardIndex) == shards_.end()) + if (shards_.find(shardIndex) == shards_.end() && + preparedIndexes_.find(shardIndex) == preparedIndexes_.end()) + { return shardIndex; + } } assert(false); @@ -1284,35 +1290,22 @@ DatabaseShardImp::findAcquireIndex( void DatabaseShardImp::finalizeShard( - ShardInfo& shardInfo, + std::shared_ptr& shard, bool writeSQLite, - std::lock_guard&, boost::optional const& expectedHash) { - assert(shardInfo.shard); - assert(shardInfo.shard->index() != acquireIndex_); - assert(shardInfo.shard->isBackendComplete()); - assert(shardInfo.state != ShardInfo::State::finalize); - - auto const shardIndex{shardInfo.shard->index()}; - - shardInfo.state = ShardInfo::State::finalize; - taskQueue_->addTask([this, shardIndex, writeSQLite, expectedHash]() { + taskQueue_->addTask([this, + wptr = std::weak_ptr(shard), + writeSQLite, + expectedHash]() { if (isStopping()) return; - std::shared_ptr shard; + auto shard{wptr.lock()}; + if (!shard) { - std::lock_guard lock(mutex_); - if (auto const it{shards_.find(shardIndex)}; it != shards_.end()) - { - shard = it->second.shard; - } - else - { - JLOG(j_.error()) << "Unable to finalize shard " << shardIndex; - return; - } + JLOG(j_.debug()) << "Shard removed before being finalized"; + return; } if (!shard->finalize(writeSQLite, expectedHash)) @@ -1329,42 +1322,35 @@ DatabaseShardImp::finalizeShard( return; { + auto const boundaryIndex{shardBoundaryIndex()}; + std::lock_guard lock(mutex_); - auto const it{shards_.find(shardIndex)}; - if (it == shards_.end()) - return; - it->second.state = ShardInfo::State::final; updateStatus(lock); - auto const boundaryIndex = shardBoundaryIndex(lock); - auto const isHistoricalShard = shardIndex < boundaryIndex; - - if (isHistoricalShard) + if (shard->index() < boundaryIndex) { + // This is a historical shard if (!historicalPaths_.empty() && shard->getDir().parent_path() == dir_) { - // This is a historical shard that wasn't - // placed at a separate historical path - JLOG(j_.warn()) << "shard " << shardIndex + // Shard wasn't placed at a separate historical path + JLOG(j_.warn()) << "shard " << shard->index() << " is not stored at a historical path"; } } else { - // Not a historical shard. Shift recent shards - // if necessary + // Not a historical shard. Shift recent shards if necessary relocateOutdatedShards(lock); - assert(!boundaryIndex || shardIndex - boundaryIndex <= 1); + assert(!boundaryIndex || shard->index() - boundaryIndex <= 1); - auto& recentShard = shardIndex == boundaryIndex + auto& recentShard = shard->index() == boundaryIndex ? secondLatestShardIndex_ : latestShardIndex_; - // Set the appropriate recent shard - // index - recentShard = shardIndex; + // Set the appropriate recent shard index + recentShard = shard->index(); if (shard->getDir().parent_path() != dir_) { @@ -1383,7 +1369,7 @@ DatabaseShardImp::finalizeShard( protocol::TMPeerShardInfo message; PublicKey const& publicKey{app_.nodeIdentity().first}; message.set_nodepubkey(publicKey.data(), publicKey.size()); - message.set_shardindexes(std::to_string(shardIndex)); + message.set_shardindexes(std::to_string(shard->index())); app_.overlay().foreach(send_always(std::make_shared( message, protocol::mtPEER_SHARD_INFO))); } @@ -1396,14 +1382,12 @@ DatabaseShardImp::setFileStats() std::vector> shards; { std::lock_guard lock(mutex_); - assert(init_); - if (shards_.empty()) return; + shards.reserve(shards_.size()); for (auto const& e : shards_) - if (e.second.shard) - shards.push_back(e.second.shard); + shards.push_back(e.second); } std::uint64_t sumSz{0}; @@ -1411,9 +1395,9 @@ DatabaseShardImp::setFileStats() std::uint32_t numShards{0}; for (auto const& e : shards) { - if (auto shard{e.lock()}; shard) + if (auto const shard{e.lock()}; shard) { - auto [sz, fd] = shard->fileInfo(); + auto const [sz, fd] = shard->getFileInfo(); sumSz += sz; sumFd += fd; ++numShards; @@ -1442,45 +1426,20 @@ DatabaseShardImp::setFileStats() } void -DatabaseShardImp::updateStatus(std::lock_guard&) +DatabaseShardImp::updateStatus(std::lock_guard const&) { if (!shards_.empty()) { RangeSet rs; for (auto const& e : shards_) - if (e.second.state == ShardInfo::State::final) - rs.insert(e.second.shard->index()); + if (e.second->getState() == Shard::final) + rs.insert(e.second->index()); status_ = to_string(rs); } else status_.clear(); } -std::pair, std::shared_ptr> -DatabaseShardImp::getCache(std::uint32_t seq) -{ - auto const shardIndex{seqToShardIndex(seq)}; - std::shared_ptr shard; - { - std::lock_guard lock(mutex_); - assert(init_); - - if (auto const it{shards_.find(shardIndex)}; - it != shards_.end() && it->second.shard) - { - shard = it->second.shard; - } - else - return {}; - } - - std::shared_ptr pCache; - std::shared_ptr nCache; - std::tie(std::ignore, pCache, nCache) = shard->getBackendAll(); - - return std::make_pair(pCache, nCache); -} - bool DatabaseShardImp::sufficientStorage( std::uint32_t numShards, @@ -1528,8 +1487,8 @@ DatabaseShardImp::sufficientStorage( } catch (std::exception const& e) { - JLOG(j_.error()) << "exception " << e.what() << " in function " - << __func__; + JLOG(j_.fatal()) << "Exception caught in function " << __func__ + << ". Error: " << e.what(); return false; } @@ -1537,29 +1496,26 @@ DatabaseShardImp::sufficientStorage( } bool -DatabaseShardImp::storeLedgerInShard( +DatabaseShardImp::setStoredInShard( std::shared_ptr& shard, std::shared_ptr const& ledger) { - bool result{true}; - - if (!shard->store(ledger)) + if (!shard->setLedgerStored(ledger)) { // Invalid or corrupt shard, remove it removeFailedShard(shard); - result = false; + return false; } - else if (shard->isBackendComplete()) + + if (shard->getState() == Shard::complete) { std::lock_guard lock(mutex_); - if (auto const it{shards_.find(shard->index())}; it != shards_.end()) { if (shard->index() == acquireIndex_) acquireIndex_ = 0; - if (it->second.state != ShardInfo::State::finalize) - finalizeShard(it->second, false, lock, boost::none); + finalizeShard(it->second, false, boost::none); } else { @@ -1569,7 +1525,7 @@ DatabaseShardImp::storeLedgerInShard( } setFileStats(); - return result; + return true; } void @@ -1587,8 +1543,11 @@ DatabaseShardImp::removeFailedShard(std::shared_ptr& shard) if (shard->index() == secondLatestShardIndex_) secondLatestShardIndex_ = boost::none; - if ((shards_.erase(shard->index()) > 0) && shard->isFinal()) + if ((shards_.erase(shard->index()) > 0) && + shard->getState() == Shard::final) + { updateStatus(lock); + } } shard->removeOnDestroy(); @@ -1600,29 +1559,26 @@ DatabaseShardImp::removeFailedShard(std::shared_ptr& shard) } std::uint32_t -DatabaseShardImp::shardBoundaryIndex(std::lock_guard const&) const +DatabaseShardImp::shardBoundaryIndex() const { - auto const validIndex = app_.getLedgerMaster().getValidLedgerIndex(); - - // Shards with an index earlier than recentShardBoundaryIndex + // Shards with an index earlier than the recent shard boundary index // are considered historical. The three shards at or later than // this index consist of the two most recently validated shards // and the shard still in the process of being built by live // transactions. - return NodeStore::seqToShardIndex(validIndex, ledgersPerShard_) - 1; + return NodeStore::seqToShardIndex( + app_.getLedgerMaster().getValidLedgerIndex(), ledgersPerShard_) - + 1; } std::uint32_t DatabaseShardImp::numHistoricalShards( std::lock_guard const& lock) const { - auto const recentShardBoundaryIndex = shardBoundaryIndex(lock); - + auto const boundaryIndex{shardBoundaryIndex()}; return std::count_if( - shards_.begin(), - shards_.end(), - [recentShardBoundaryIndex](auto const& entry) { - return entry.first < recentShardBoundaryIndex; + shards_.begin(), shards_.end(), [boundaryIndex](auto const& entry) { + return entry.first < boundaryIndex; }); } @@ -1633,8 +1589,8 @@ DatabaseShardImp::relocateOutdatedShards( if (auto& cur = latestShardIndex_, &prev = secondLatestShardIndex_; cur || prev) { - auto const latestShardIndex = NodeStore::seqToShardIndex( - app_.getLedgerMaster().getValidLedgerIndex(), ledgersPerShard_); + auto const latestShardIndex = + seqToShardIndex(app_.getLedgerMaster().getValidLedgerIndex()); auto const separateHistoricalPath = !historicalPaths_.empty(); @@ -1644,8 +1600,8 @@ DatabaseShardImp::relocateOutdatedShards( if (auto it = shards_.find(shardIndex); it != shards_.end()) { - if (it->second.shard) - removeFailedShard(it->second.shard); + if (it->second) + removeFailedShard(it->second); else { JLOG(j_.warn()) << "can't find shard to remove"; @@ -1688,49 +1644,43 @@ DatabaseShardImp::relocateOutdatedShards( if (auto it = shards_.find(shardIndex); it != shards_.end()) { - if (auto& shard = it->second.shard) - { - // Close any open file descriptors before moving - // the shard dir. Don't call removeOnDestroy since - // that would attempt to close the fds after the - // directory has been moved. - shard->closeAll(); + auto& shard{it->second}; - try - { - // Move the shard directory to the new path - boost::filesystem::rename( - shard->getDir().string(), - dst / std::to_string(shardIndex)); - } - catch (...) - { - JLOG(j_.error()) - << "shard " << shardIndex - << " failed to move to historical storage"; - - return; - } - - // Create a shard instance at the new location - shard = std::make_unique( - app_, *this, shardIndex, dst, j_); - - // Open the new shard - if (!shard->open(scheduler_, *ctx_)) - { - JLOG(j_.error()) - << "shard " << shardIndex - << " failed to open in historical storage"; - - shard->removeOnDestroy(); - shard.reset(); - } - } - else + // Close any open file descriptors before moving the shard + // directory. Don't call removeOnDestroy since that would + // attempt to close the fds after the directory has been moved. + if (!shard->tryClose()) { JLOG(j_.warn()) - << "can't find shard to move to historical path"; + << "can't close shard to move to historical path"; + return; + } + + try + { + // Move the shard directory to the new path + boost::filesystem::rename( + shard->getDir().string(), + dst / std::to_string(shardIndex)); + } + catch (...) + { + JLOG(j_.error()) << "shard " << shardIndex + << " failed to move to historical storage"; + return; + } + + // Create a shard instance at the new location + shard = + std::make_shared(app_, *this, shardIndex, dst, j_); + + // Open the new shard + if (!shard->init(scheduler_, *ctx_)) + { + JLOG(j_.error()) << "shard " << shardIndex + << " failed to open in historical storage"; + shard->removeOnDestroy(); + shard.reset(); } } else @@ -1740,21 +1690,19 @@ DatabaseShardImp::relocateOutdatedShards( } }; - // See if either of the recent shards - // needs to be updated + // See if either of the recent shards needs to be updated bool const curNotSynched = latestShardIndex_ && *latestShardIndex_ != latestShardIndex; bool const prevNotSynched = secondLatestShardIndex_ && *secondLatestShardIndex_ != latestShardIndex - 1; - // A new shard has been published. Move outdated shards - // to historical storage as needed + // A new shard has been published. Move outdated + // shards to historical storage as needed if (curNotSynched || prevNotSynched) { if (prev) { - // Move the formerly second latest shard to - // historical storage + // Move the formerly second latest shard to historical storage if (keepShard(*prev) && separateHistoricalPath) { moveShard(*prev); @@ -1765,19 +1713,16 @@ DatabaseShardImp::relocateOutdatedShards( if (cur) { - // The formerly latest shard is now - // the second latest + // The formerly latest shard is now the second latest if (cur == latestShardIndex - 1) { prev = cur; } - // The formerly latest shard is no - // longer a 'recent' shard + // The formerly latest shard is no longer a 'recent' shard else { - // Move the formerly latest shard to - // historical storage + // Move the formerly latest shard to historical storage if (keepShard(*cur) && separateHistoricalPath) { moveShard(*cur); @@ -1796,9 +1741,8 @@ DatabaseShardImp::prepareForNewShard( std::uint32_t numHistoricalShards, std::lock_guard const& lock) -> boost::optional { - // Any shard earlier than the two most recent shards - // is a historical shard - auto const boundaryIndex = shardBoundaryIndex(lock); + // Any shard earlier than the two most recent shards is a historical shard + auto const boundaryIndex{shardBoundaryIndex()}; auto const isHistoricalShard = shardIndex < boundaryIndex; auto const designation = isHistoricalShard && !historicalPaths_.empty() @@ -1860,10 +1804,8 @@ DatabaseShardImp::checkHistoricalPaths() const { #if BOOST_OS_LINUX // Each historical shard path must correspond - // to a directory on a distinct device or filesystem. - // Currently, this constraint is enforced only on - // Linux. - + // to a directory on a distinct device or file system. + // Currently, this constraint is enforced only on Linux. std::unordered_map> filesystemIDs( historicalPaths_.size()); @@ -1884,12 +1826,11 @@ DatabaseShardImp::checkHistoricalPaths() const bool ret = true; for (auto const& entry : filesystemIDs) { - // Check to see if any of the paths - // are stored on the same filesystem + // Check to see if any of the paths are stored on the same file system if (entry.second.size() > 1) { // Two or more historical storage paths - // correspond to the same filesystem. + // correspond to the same file system. JLOG(j_.error()) << "The following paths correspond to the same filesystem: " << boost::algorithm::join(entry.second, ", ") @@ -1904,14 +1845,13 @@ DatabaseShardImp::checkHistoricalPaths() const #else // The requirement that each historical storage path - // corresponds to a distinct device or filesystem is + // corresponds to a distinct device or file system is // enforced only on Linux, so on other platforms // keep track of the available capacities for each // path. Issue a warning if we suspect any of the paths // may violate this requirement. - // Map byte counts to each path that - // shares that byte count. + // Map byte counts to each path that shares that byte count. std::unordered_map> uniqueCapacities(historicalPaths_.size()); @@ -1921,20 +1861,18 @@ DatabaseShardImp::checkHistoricalPaths() const for (auto const& entry : uniqueCapacities) { - // Check to see if any paths have the - // same amount of available bytes. + // Check to see if any paths have the same amount of available bytes. if (entry.second.size() > 1) { // Two or more historical storage paths may - // correspond to the same device or - // filesystem. + // correspond to the same device or file system. JLOG(j_.warn()) << "Each of the following paths have " << entry.first << " bytes free, and may be located on the same device" - " or filesystem: " + " or file system: " << boost::algorithm::join(entry.second, ", ") << ". Each configured historical storage path should" - " be on a unique device or filesystem."; + " be on a unique device or file system."; } } #endif diff --git a/src/ripple/nodestore/impl/DatabaseShardImp.h b/src/ripple/nodestore/impl/DatabaseShardImp.h index 934605d59..e35779eda 100644 --- a/src/ripple/nodestore/impl/DatabaseShardImp.h +++ b/src/ripple/nodestore/impl/DatabaseShardImp.h @@ -48,12 +48,10 @@ public: int readThreads, beast::Journal j); - ~DatabaseShardImp() override; - - bool + [[nodiscard]] bool init() override; - boost::optional + [[nodiscard]] boost::optional prepareLedger(std::uint32_t validLedgerSeq) override; bool @@ -70,7 +68,7 @@ public: override; std::shared_ptr - fetchLedger(uint256 const& hash, std::uint32_t seq) override; + fetchLedger(uint256 const& hash, std::uint32_t ledgerSeq) override; void setStored(std::shared_ptr const& ledger) override; @@ -78,9 +76,6 @@ public: std::string getCompleteShards() override; - void - validate() override; - std::uint32_t ledgersPerShard() const override { @@ -94,10 +89,10 @@ public: } std::uint32_t - seqToShardIndex(std::uint32_t seq) const override + seqToShardIndex(std::uint32_t ledgerSeq) const override { - assert(seq >= earliestLedgerSeq()); - return NodeStore::seqToShardIndex(seq, ledgersPerShard_); + assert(ledgerSeq >= earliestLedgerSeq()); + return NodeStore::seqToShardIndex(ledgerSeq, ledgersPerShard_); } std::uint32_t @@ -131,6 +126,9 @@ public: void onStop() override; + void + onChildrenStopped() override; + /** Import the application local node store @param source The application node store. @@ -146,22 +144,19 @@ public: NodeObjectType type, Blob&& data, uint256 const& hash, - std::uint32_t seq) override; - - std::shared_ptr - fetch(uint256 const& hash, std::uint32_t seq) override; + std::uint32_t ledgerSeq) override; bool asyncFetch( uint256 const& hash, - std::uint32_t seq, - std::shared_ptr& object) override; + std::uint32_t ledgerSeq, + std::shared_ptr& nodeObject) override; bool storeLedger(std::shared_ptr const& srcLedger) override; int - getDesiredAsyncReadCount(std::uint32_t seq) override; + getDesiredAsyncReadCount(std::uint32_t ledgerSeq) override; float getCacheHitRate() override; @@ -173,26 +168,6 @@ public: sweep() override; private: - struct ShardInfo - { - enum class State { - none, - final, // Immutable, complete and validated - acquire, // Being acquired - import, // Being imported - finalize // Being finalized - }; - - ShardInfo() = default; - ShardInfo(std::shared_ptr shard_, State state_) - : shard(std::move(shard_)), state(state_) - { - } - - std::shared_ptr shard; - State state{State::none}; - }; - enum class PathDesignation : uint8_t { none, // No path specified historical // Needs a historical path @@ -210,7 +185,10 @@ private: std::unique_ptr taskQueue_; // Shards held by this server - std::map shards_; + std::unordered_map> shards_; + + // Shard indexes being imported + std::set preparedIndexes_; // Shard index being acquired from the peer network std::uint32_t acquireIndex_{0}; @@ -247,6 +225,9 @@ private: // Average storage space required by a shard (in bytes) std::uint64_t avgShardFileSz_; + // The limit of final shards with open databases at any time + std::uint32_t const openFinalLimit_; + // File name used to mark shards being imported from node store static constexpr auto importMarker_ = "import"; @@ -263,10 +244,13 @@ private: // Initialize settings from the configuration file // Lock must be held bool - initConfig(std::lock_guard&); + initConfig(std::lock_guard const&); std::shared_ptr - fetchFrom(uint256 const& hash, std::uint32_t seq) override; + fetchNodeObject( + uint256 const& hash, + std::uint32_t ledgerSeq, + FetchReport& fetchReport) override; void for_each(std::function)> f) override @@ -279,30 +263,24 @@ private: boost::optional findAcquireIndex( std::uint32_t validLedgerSeq, - std::lock_guard&); + std::lock_guard const&); -private: - // Queue a task to finalize a shard by validating its databases + // Queue a task to finalize a shard by verifying its databases // Lock must be held void finalizeShard( - ShardInfo& shardInfo, + std::shared_ptr& shard, bool writeSQLite, - std::lock_guard&, boost::optional const& expectedHash); // Set storage and file descriptor usage stats - // Lock must NOT be held void setFileStats(); // Update status string // Lock must be held void - updateStatus(std::lock_guard&); - - std::pair, std::shared_ptr> - getCache(std::uint32_t seq); + updateStatus(std::lock_guard const&); // Returns true if the filesystem has enough storage // available to hold the specified number of shards. @@ -317,7 +295,7 @@ private: std::lock_guard const&) const; bool - storeLedgerInShard( + setStoredInShard( std::shared_ptr& shard, std::shared_ptr const& ledger); @@ -327,7 +305,7 @@ private: // Returns the index that represents the logical // partition between historical and recent shards std::uint32_t - shardBoundaryIndex(std::lock_guard const&) const; + shardBoundaryIndex() const; std::uint32_t numHistoricalShards(std::lock_guard const& lock) const; diff --git a/src/ripple/nodestore/impl/Shard.cpp b/src/ripple/nodestore/impl/Shard.cpp index 76c31253d..ae7e2c5f0 100644 --- a/src/ripple/nodestore/impl/Shard.cpp +++ b/src/ripple/nodestore/impl/Shard.cpp @@ -49,6 +49,7 @@ Shard::Shard( boost::filesystem::path const& dir, beast::Journal j) : app_(app) + , j_(j) , index_(index) , firstSeq_(db.firstLedgerSeq(index)) , lastSeq_(std::max(firstSeq_, db.lastLedgerSeq(index))) @@ -56,357 +57,529 @@ Shard::Shard( index == db.earliestShardIndex() ? lastSeq_ - firstSeq_ + 1 : db.ledgersPerShard()) , dir_((dir.empty() ? db.getRootDir() : dir) / std::to_string(index_)) - , j_(j) { - if (index_ < db.earliestShardIndex()) - Throw("Shard: Invalid index"); } Shard::~Shard() { - if (removeOnDestroy_) + if (!removeOnDestroy_) + return; + + if (backend_) { + // Abort removal if the backend is in use + if (backendCount_ > 0) + { + JLOG(j_.error()) << "shard " << index_ + << " backend in use, unable to remove directory"; + return; + } + + // Release database files first otherwise remove_all may fail backend_.reset(); lgrSQLiteDB_.reset(); txSQLiteDB_.reset(); acquireInfo_.reset(); + } - try - { - boost::filesystem::remove_all(dir_); - } - catch (std::exception const& e) - { - JLOG(j_.error()) << "shard " << index_ << " exception " << e.what() - << " in function " << __func__; - } + try + { + boost::filesystem::remove_all(dir_); + } + catch (std::exception const& e) + { + JLOG(j_.fatal()) << "shard " << index_ + << ". Exception caught in function " << __func__ + << ". Error: " << e.what(); } } bool -Shard::open(Scheduler& scheduler, nudb::context& ctx) +Shard::init(Scheduler& scheduler, nudb::context& context) { - std::lock_guard lock{mutex_}; - assert(!backend_); - - Config const& config{app_.config()}; + Section section{app_.config().section(ConfigSection::shardDatabase())}; + std::string const type{get(section, "type", "nudb")}; + auto const factory{Manager::instance().find(type)}; + if (!factory) { - Section section{config.section(ConfigSection::shardDatabase())}; - std::string const type{get(section, "type", "nudb")}; - auto factory{Manager::instance().find(type)}; - if (!factory) - { - JLOG(j_.error()) << "shard " << index_ - << " failed to create backend type " << type; - return false; - } + JLOG(j_.error()) << "shard " << index_ << " failed to find factory for " + << type; + return false; + } + section.set("path", dir_.string()); - section.set("path", dir_.string()); - backend_ = factory->createInstance( - NodeObject::keyBytes, section, scheduler, ctx, j_); + std::lock_guard lock{mutex_}; + if (backend_) + { + JLOG(j_.error()) << "shard " << index_ << " already initialized"; + return false; + } + backend_ = factory->createInstance( + NodeObject::keyBytes, section, scheduler, context, j_); + + return open(lock); +} + +bool +Shard::isOpen() const +{ + std::lock_guard lock(mutex_); + if (!backend_) + { + JLOG(j_.error()) << "shard " << index_ << " not initialized"; + return false; } - using namespace boost::filesystem; - auto preexist{false}; - auto fail = [this, &preexist](std::string const& msg) { - pCache_.reset(); - nCache_.reset(); - backend_.reset(); - lgrSQLiteDB_.reset(); - txSQLiteDB_.reset(); - acquireInfo_.reset(); + return backend_->isOpen(); +} - if (!preexist) - remove_all(dir_); - - if (!msg.empty()) - { - JLOG(j_.fatal()) << "shard " << index_ << " " << msg; - } +bool +Shard::tryClose() +{ + // Keep database open if being acquired or finalized + if (state_ != final) return false; - }; - auto createAcquireInfo = [this, &config]() { - acquireInfo_ = std::make_unique(); + std::lock_guard lock(mutex_); - DatabaseCon::Setup setup; - setup.startUp = config.START_UP; - setup.standAlone = config.standalone(); - setup.dataDir = dir_; - setup.useGlobalPragma = true; + // Keep database open if in use + if (backendCount_ > 0) + return false; - acquireInfo_->SQLiteDB = std::make_unique( - setup, - AcquireShardDBName, - AcquireShardDBPragma, - AcquireShardDBInit, - DatabaseCon::CheckpointerSetup{&app_.getJobQueue(), &app_.logs()}); - }; + if (!backend_) + { + JLOG(j_.error()) << "shard " << index_ << " not initialized"; + return false; + } + if (!backend_->isOpen()) + return false; try { - // Open or create the NuDB key/value store - preexist = exists(dir_); - backend_->open(!preexist); - - if (!preexist) - { - // A new shard - createAcquireInfo(); - acquireInfo_->SQLiteDB->getSession() - << "INSERT INTO Shard (ShardIndex) " - "VALUES (:shardIndex);", - soci::use(index_); - } - else if (exists(dir_ / AcquireShardDBName)) - { - // An incomplete shard, being acquired - createAcquireInfo(); - - auto& session{acquireInfo_->SQLiteDB->getSession()}; - boost::optional index; - soci::blob sociBlob(session); - soci::indicator blobPresent; - - session << "SELECT ShardIndex, StoredLedgerSeqs " - "FROM Shard " - "WHERE ShardIndex = :index;", - soci::into(index), soci::into(sociBlob, blobPresent), - soci::use(index_); - - if (!index || index != index_) - return fail("invalid acquire SQLite database"); - - if (blobPresent == soci::i_ok) - { - std::string s; - auto& storedSeqs{acquireInfo_->storedSeqs}; - if (convert(sociBlob, s); !from_string(storedSeqs, s)) - return fail("invalid StoredLedgerSeqs"); - - if (boost::icl::first(storedSeqs) < firstSeq_ || - boost::icl::last(storedSeqs) > lastSeq_) - { - return fail("invalid StoredLedgerSeqs"); - } - - if (boost::icl::length(storedSeqs) == maxLedgers_) - // All ledgers have been acquired, shard backend is complete - backendComplete_ = true; - } - } - else - { - // A finalized shard or has all ledgers stored in the backend - std::shared_ptr nObj; - if (backend_->fetch(finalKey.data(), &nObj) != Status::ok) - { - legacy_ = true; - return fail("incompatible, missing backend final key"); - } - - // Check final key's value - SerialIter sIt(nObj->getData().data(), nObj->getData().size()); - if (sIt.get32() != version) - return fail("invalid version"); - - if (sIt.get32() != firstSeq_ || sIt.get32() != lastSeq_) - return fail("out of range ledger sequences"); - - if (sIt.get256().isZero()) - return fail("invalid last ledger hash"); - - if (exists(dir_ / LgrDBName) && exists(dir_ / TxDBName)) - final_ = true; - - backendComplete_ = true; - } + backend_->close(); } catch (std::exception const& e) { - return fail( - std::string("exception ") + e.what() + " in function " + __func__); + JLOG(j_.fatal()) << "shard " << index_ + << ". Exception caught in function " << __func__ + << ". Error: " << e.what(); + return false; } - // Set backend caches - { - auto const size{config.getValueFor(SizedItem::nodeCacheSize, 0)}; - auto const age{std::chrono::seconds{ - config.getValueFor(SizedItem::nodeCacheAge, 0)}}; - auto const name{"shard " + std::to_string(index_)}; - - pCache_ = std::make_shared(name, size, age, stopwatch(), j_); - nCache_ = std::make_shared(name, stopwatch(), size, age); - } - - if (!initSQLite(lock)) - return fail({}); - - setFileStats(lock); - return true; -} - -void -Shard::closeAll() -{ - backend_.reset(); lgrSQLiteDB_.reset(); txSQLiteDB_.reset(); acquireInfo_.reset(); + + // Reset caches to reduce memory use + pCache_->reset(); + nCache_->reset(); + app_.getShardFamily()->getFullBelowCache(lastSeq_)->reset(); + app_.getShardFamily()->getTreeNodeCache(lastSeq_)->reset(); + + return true; } boost::optional Shard::prepare() { - std::lock_guard lock(mutex_); - assert(backend_); - - if (backendComplete_) + if (state_ != acquire) { JLOG(j_.warn()) << "shard " << index_ - << " prepare called when shard backend is complete"; - return {}; + << " prepare called when not acquiring"; + return boost::none; } - assert(acquireInfo_); - auto const& storedSeqs{acquireInfo_->storedSeqs}; - if (storedSeqs.empty()) + std::lock_guard lock(mutex_); + if (!acquireInfo_) + { + JLOG(j_.error()) << "shard " << index_ + << " missing acquire SQLite database"; + return boost::none; + } + + if (acquireInfo_->storedSeqs.empty()) return lastSeq_; - return prevMissing(storedSeqs, 1 + lastSeq_, firstSeq_); + return prevMissing(acquireInfo_->storedSeqs, 1 + lastSeq_, firstSeq_); } bool -Shard::store(std::shared_ptr const& ledger) +Shard::storeNodeObject(std::shared_ptr const& nodeObject) { - auto const seq{ledger->info().seq}; - if (seq < firstSeq_ || seq > lastSeq_) + if (state_ != acquire) + { + // The import node store case is an exception + if (nodeObject->getHash() != finalKey) + { + // Ignore residual calls from InboundLedgers + JLOG(j_.trace()) << "shard " << index_ << " not acquiring"; + return false; + } + } + + auto const scopedCount{makeBackendCount()}; + if (!scopedCount) + return false; + + pCache_->canonicalize_replace_cache(nodeObject->getHash(), nodeObject); + + try + { + backend_->store(nodeObject); + } + catch (std::exception const& e) + { + JLOG(j_.fatal()) << "shard " << index_ + << ". Exception caught in function " << __func__ + << ". Error: " << e.what(); + return false; + } + + nCache_->erase(nodeObject->getHash()); + return true; +} + +std::shared_ptr +Shard::fetchNodeObject(uint256 const& hash, FetchReport& fetchReport) +{ + auto const scopedCount{makeBackendCount()}; + if (!scopedCount) + return nullptr; + + // See if the node object exists in the cache + auto nodeObject{pCache_->fetch(hash)}; + if (!nodeObject && !nCache_->touch_if_exists(hash)) + { + // Try the backend + fetchReport.wentToDisk = true; + + Status status; + try + { + status = backend_->fetch(hash.data(), &nodeObject); + } + catch (std::exception const& e) + { + JLOG(j_.fatal()) + << "shard " << index_ << ". Exception caught in function " + << __func__ << ". Error: " << e.what(); + return nullptr; + } + + switch (status) + { + case ok: + case notFound: + break; + case dataCorrupt: { + JLOG(j_.fatal()) + << "shard " << index_ << ". Corrupt node object at hash " + << to_string(hash); + break; + } + default: { + JLOG(j_.warn()) + << "shard " << index_ << ". Unknown status=" << status + << " fetching node object at hash " << to_string(hash); + break; + } + } + + if (!nodeObject) + { + // Just in case a write occurred + nodeObject = pCache_->fetch(hash); + if (!nodeObject) + // We give up + nCache_->insert(hash); + } + else + { + // Ensure all threads get the same object + pCache_->canonicalize_replace_client(hash, nodeObject); + fetchReport.wasFound = true; + + // Since this was a 'hard' fetch, we will log it + JLOG(j_.trace()) << "HOS: " << hash << " fetch: in shard db"; + } + } + + return nodeObject; +} + +bool +Shard::fetchNodeObjectFromCache( + uint256 const& hash, + std::shared_ptr& nodeObject) +{ + auto const scopedCount{makeBackendCount()}; + if (!scopedCount) + return false; + + nodeObject = pCache_->fetch(hash); + if (nodeObject || nCache_->touch_if_exists(hash)) + return true; + return false; +} + +Shard::StoreLedgerResult +Shard::storeLedger( + std::shared_ptr const& srcLedger, + std::shared_ptr const& next) +{ + StoreLedgerResult result; + if (state_ != acquire) + { + // Ignore residual calls from InboundLedgers + JLOG(j_.trace()) << "shard " << index_ << ". Not acquiring"; + return result; + } + if (containsLedger(srcLedger->info().seq)) + { + JLOG(j_.trace()) << "shard " << index_ << ". Ledger already stored"; + return result; + } + + auto fail = [&](std::string const& msg) { + JLOG(j_.error()) << "shard " << index_ << ". Source ledger sequence " + << srcLedger->info().seq << ". " << msg; + result.error = true; + return result; + }; + + if (srcLedger->info().hash.isZero()) + return fail("Invalid hash"); + if (srcLedger->info().accountHash.isZero()) + return fail("Invalid account hash"); + + auto& srcDB{const_cast(srcLedger->stateMap().family().db())}; + if (&srcDB == &(app_.getShardFamily()->db())) + return fail("Source and destination databases are the same"); + + auto const scopedCount{makeBackendCount()}; + if (!scopedCount) + return fail("Failed to lock backend"); + + Batch batch; + batch.reserve(batchWritePreallocationSize); + auto storeBatch = [&]() { + std::uint64_t sz{0}; + for (auto const& nodeObject : batch) + { + pCache_->canonicalize_replace_cache( + nodeObject->getHash(), nodeObject); + nCache_->erase(nodeObject->getHash()); + sz += nodeObject->getData().size(); + } + + try + { + backend_->storeBatch(batch); + } + catch (std::exception const& e) + { + fail( + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what()); + return false; + } + + result.count += batch.size(); + result.size += sz; + batch.clear(); + return true; + }; + + // Store ledger header + { + Serializer s(sizeof(std::uint32_t) + sizeof(LedgerInfo)); + s.add32(HashPrefix::ledgerMaster); + addRaw(srcLedger->info(), s); + auto nodeObject = NodeObject::createObject( + hotLEDGER, std::move(s.modData()), srcLedger->info().hash); + batch.emplace_back(std::move(nodeObject)); + } + + bool error = false; + auto visit = [&](SHAMapAbstractNode& node) { + if (!stop_) + { + if (auto nodeObject = srcDB.fetchNodeObject( + node.getNodeHash().as_uint256(), srcLedger->info().seq)) + { + batch.emplace_back(std::move(nodeObject)); + if (batch.size() < batchWritePreallocationSize || storeBatch()) + return true; + } + } + + error = true; + return false; + }; + + // Store the state map + if (srcLedger->stateMap().getHash().isNonZero()) + { + if (!srcLedger->stateMap().isValid()) + return fail("Invalid state map"); + + if (next && next->info().parentHash == srcLedger->info().hash) + { + auto have = next->stateMap().snapShot(false); + srcLedger->stateMap().snapShot(false)->visitDifferences( + &(*have), visit); + } + else + srcLedger->stateMap().snapShot(false)->visitNodes(visit); + if (error) + return fail("Failed to store state map"); + } + + // Store the transaction map + if (srcLedger->info().txHash.isNonZero()) + { + if (!srcLedger->txMap().isValid()) + return fail("Invalid transaction map"); + + srcLedger->txMap().snapShot(false)->visitNodes(visit); + if (error) + return fail("Failed to store transaction map"); + } + + if (!batch.empty() && !storeBatch()) + return fail("Failed to store"); + + return result; +} + +bool +Shard::setLedgerStored(std::shared_ptr const& ledger) +{ + if (state_ != acquire) + { + // Ignore residual calls from InboundLedgers + JLOG(j_.trace()) << "shard " << index_ << " not acquiring"; + return false; + } + + auto const ledgerSeq{ledger->info().seq}; + if (ledgerSeq < firstSeq_ || ledgerSeq > lastSeq_) { JLOG(j_.error()) << "shard " << index_ << " invalid ledger sequence " - << seq; + << ledgerSeq; return false; } std::lock_guard lock(mutex_); - assert(backend_); - - if (backendComplete_) + if (!acquireInfo_) { - JLOG(j_.debug()) << "shard " << index_ << " ledger sequence " << seq - << " already stored"; - return true; + JLOG(j_.error()) << "shard " << index_ + << " missing acquire SQLite database"; + return false; } - - assert(acquireInfo_); - auto& storedSeqs{acquireInfo_->storedSeqs}; - if (boost::icl::contains(storedSeqs, seq)) + if (boost::icl::contains(acquireInfo_->storedSeqs, ledgerSeq)) { - JLOG(j_.debug()) << "shard " << index_ << " ledger sequence " << seq - << " already stored"; + // Ignore redundant calls + JLOG(j_.debug()) << "shard " << index_ << " ledger sequence " + << ledgerSeq << " already stored"; return true; } // storeSQLite looks at storedSeqs so insert before the call - storedSeqs.insert(seq); + acquireInfo_->storedSeqs.insert(ledgerSeq); if (!storeSQLite(ledger, lock)) return false; - if (boost::icl::length(storedSeqs) >= maxLedgers_) + if (boost::icl::length(acquireInfo_->storedSeqs) >= maxLedgers_) { if (!initSQLite(lock)) return false; - backendComplete_ = true; + state_ = complete; } - JLOG(j_.debug()) << "shard " << index_ << " stored ledger sequence " << seq - << (backendComplete_ ? " . All ledgers stored" : ""); + JLOG(j_.debug()) << "shard " << index_ << " stored ledger sequence " + << ledgerSeq; setFileStats(lock); return true; } bool -Shard::containsLedger(std::uint32_t seq) const +Shard::containsLedger(std::uint32_t ledgerSeq) const { - if (seq < firstSeq_ || seq > lastSeq_) + if (ledgerSeq < firstSeq_ || ledgerSeq > lastSeq_) return false; - - std::lock_guard lock(mutex_); - if (backendComplete_) + if (state_ != acquire) return true; - assert(acquireInfo_); - return boost::icl::contains(acquireInfo_->storedSeqs, seq); + std::lock_guard lock(mutex_); + if (!acquireInfo_) + { + JLOG(j_.error()) << "shard " << index_ + << " missing acquire SQLite database"; + return false; + } + return boost::icl::contains(acquireInfo_->storedSeqs, ledgerSeq); } void Shard::sweep() { - std::lock_guard lock(mutex_); - assert(pCache_ && nCache_); + boost::optional scopedCount; + { + std::lock_guard lock(mutex_); + if (!backend_ || !backend_->isOpen()) + { + JLOG(j_.error()) << "shard " << index_ << " not initialized"; + return; + } + + scopedCount.emplace(&backendCount_); + } pCache_->sweep(); nCache_->sweep(); } -std::tuple< - std::shared_ptr, - std::shared_ptr, - std::shared_ptr> -Shard::getBackendAll() const +int +Shard::getDesiredAsyncReadCount() { - std::lock_guard lock(mutex_); - assert(backend_); - - return {backend_, pCache_, nCache_}; + auto const scopedCount{makeBackendCount()}; + if (!scopedCount) + return 0; + return pCache_->getTargetSize() / asyncDivider; } -std::shared_ptr -Shard::getBackend() const +float +Shard::getCacheHitRate() { - std::lock_guard lock(mutex_); - assert(backend_); - - return backend_; + auto const scopedCount{makeBackendCount()}; + if (!scopedCount) + return 0; + return pCache_->getHitRate(); } -bool -Shard::isBackendComplete() const +std::chrono::steady_clock::time_point +Shard::getLastUse() const { std::lock_guard lock(mutex_); - return backendComplete_; -} - -std::shared_ptr -Shard::pCache() const -{ - std::lock_guard lock(mutex_); - assert(pCache_); - - return pCache_; -} - -std::shared_ptr -Shard::nCache() const -{ - std::lock_guard lock(mutex_); - assert(nCache_); - - return nCache_; + return lastAccess_; } std::pair -Shard::fileInfo() const +Shard::getFileInfo() const { std::lock_guard lock(mutex_); return {fileSz_, fdRequired_}; } -bool -Shard::isFinal() const +std::int32_t +Shard::getWriteLoad() { - std::lock_guard lock(mutex_); - return final_; + auto const scopedCount{makeBackendCount()}; + if (!scopedCount) + return 0; + return backend_->getWriteLoad(); } bool @@ -421,27 +594,26 @@ Shard::finalize( bool const writeSQLite, boost::optional const& expectedHash) { - assert(backend_); - - if (stop_) - return false; - uint256 hash{0}; - std::uint32_t seq{0}; + std::uint32_t ledgerSeq{0}; auto fail = - [j = j_, index = index_, &hash, &seq](std::string const& msg) { + [j = j_, index = index_, &hash, &ledgerSeq](std::string const& msg) { JLOG(j.fatal()) << "shard " << index << ". " << msg << (hash.isZero() ? "" : ". Ledger hash " + to_string(hash)) - << (seq == 0 ? "" : ". Ledger sequence " + std::to_string(seq)); + << (ledgerSeq == 0 + ? "" + : ". Ledger sequence " + std::to_string(ledgerSeq)); return false; }; + auto const scopedCount{makeBackendCount()}; + if (!scopedCount) + return false; + try { - std::unique_lock lock(mutex_); - if (!backendComplete_) - return fail("backend incomplete"); + state_ = finalizing; /* TODO MP @@ -455,12 +627,12 @@ Shard::finalize( */ // Check if a final key has been stored - lock.unlock(); - if (std::shared_ptr nObj; - backend_->fetch(finalKey.data(), &nObj) == Status::ok) + if (std::shared_ptr nodeObject; + backend_->fetch(finalKey.data(), &nodeObject) == Status::ok) { // Check final key's value - SerialIter sIt(nObj->getData().data(), nObj->getData().size()); + SerialIter sIt( + nodeObject->getData().data(), nodeObject->getData().size()); if (sIt.get32() != version) return fail("invalid version"); @@ -474,7 +646,6 @@ Shard::finalize( { // In the absence of a final key, an acquire SQLite database // must be present in order to validate the shard - lock.lock(); if (!acquireInfo_) return fail("missing acquire SQLite database"); @@ -489,14 +660,13 @@ Shard::finalize( soci::into(index), soci::into(sHash), soci::into(sociBlob, blobPresent), soci::use(index_); - lock.unlock(); if (!index || index != index_) return fail("missing or invalid ShardIndex"); if (!sHash) return fail("missing LastLedgerHash"); - if (hash.SetHexExact(*sHash); hash.isZero()) + if (!hash.SetHexExact(*sHash) || hash.isZero()) return fail("invalid LastLedgerHash"); if (blobPresent != soci::i_ok) @@ -505,8 +675,6 @@ Shard::finalize( std::string s; convert(sociBlob, s); - lock.lock(); - auto& storedSeqs{acquireInfo_->storedSeqs}; if (!from_string(storedSeqs, s) || boost::icl::first(storedSeqs) != firstSeq_ || @@ -520,7 +688,8 @@ Shard::finalize( catch (std::exception const& e) { return fail( - std::string("exception ") + e.what() + " in function " + __func__); + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what()); } // Validate the last ledger hash of a downloaded shard @@ -529,6 +698,7 @@ Shard::finalize( return fail("invalid last ledger hash"); // Validate every ledger stored in the backend + Config const& config{app_.config()}; std::shared_ptr ledger; std::shared_ptr next; auto const lastLedgerHash{hash}; @@ -544,28 +714,28 @@ Shard::finalize( // Start with the last ledger in the shard and walk backwards from // child to parent until we reach the first ledger - seq = lastSeq_; - while (seq >= firstSeq_) + ledgerSeq = lastSeq_; + while (ledgerSeq >= firstSeq_) { if (stop_) return false; - auto nObj = valFetch(hash); - if (!nObj) + auto nodeObject{verifyFetch(hash)}; + if (!nodeObject) return fail("invalid ledger"); ledger = std::make_shared( - deserializePrefixedHeader(makeSlice(nObj->getData())), - app_.config(), + deserializePrefixedHeader(makeSlice(nodeObject->getData())), + config, shardFamily); - if (ledger->info().seq != seq) + if (ledger->info().seq != ledgerSeq) return fail("invalid ledger sequence"); if (ledger->info().hash != hash) return fail("invalid ledger hash"); - ledger->stateMap().setLedgerSeq(seq); - ledger->txMap().setLedgerSeq(seq); - ledger->setImmutable(app_.config()); + ledger->stateMap().setLedgerSeq(ledgerSeq); + ledger->txMap().setLedgerSeq(ledgerSeq); + ledger->setImmutable(config); if (!ledger->stateMap().fetchRoot( SHAMapHash{ledger->info().accountHash}, nullptr)) { @@ -578,7 +748,7 @@ Shard::finalize( return fail("missing root TXN node"); } - if (!valLedger(ledger, next)) + if (!verifyLedger(ledger, next)) return fail("failed to validate ledger"); if (writeSQLite) @@ -590,7 +760,7 @@ Shard::finalize( hash = ledger->info().parentHash; next = std::move(ledger); - --seq; + --ledgerSeq; pCache_->reset(); nCache_->reset(); @@ -628,8 +798,9 @@ Shard::finalize( } catch (std::exception const& e) { - return fail(std::string("exception ") + - e.what() + " in function " + __func__); + return fail( + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what()); } */ @@ -639,44 +810,199 @@ Shard::finalize( s.add32(firstSeq_); s.add32(lastSeq_); s.addBitString(lastLedgerHash); - auto nObj{ + auto nodeObject{ NodeObject::createObject(hotUNKNOWN, std::move(s.modData()), finalKey)}; try { - backend_->store(nObj); + backend_->store(nodeObject); + std::lock_guard lock(mutex_); - // Remove the acquire SQLite database if present + // Remove the acquire SQLite database if (acquireInfo_) + { acquireInfo_.reset(); - remove_all(dir_ / AcquireShardDBName); + remove_all(dir_ / AcquireShardDBName); + } if (!initSQLite(lock)) return fail("failed to initialize SQLite databases"); setFileStats(lock); - final_ = true; + lastAccess_ = std::chrono::steady_clock::now(); } catch (std::exception const& e) { return fail( - std::string("exception ") + e.what() + " in function " + __func__); + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what()); } + state_ = final; return true; } bool -Shard::initSQLite(std::lock_guard const&) +Shard::open(std::lock_guard const& lock) +{ + using namespace boost::filesystem; + Config const& config{app_.config()}; + auto preexist{false}; + auto fail = [this, &preexist](std::string const& msg) { + backend_->close(); + lgrSQLiteDB_.reset(); + txSQLiteDB_.reset(); + acquireInfo_.reset(); + + pCache_.reset(); + nCache_.reset(); + + state_ = acquire; + + if (!preexist) + remove_all(dir_); + + if (!msg.empty()) + { + JLOG(j_.fatal()) << "shard " << index_ << " " << msg; + } + return false; + }; + auto createAcquireInfo = [this, &config]() { + acquireInfo_ = std::make_unique(); + + DatabaseCon::Setup setup; + setup.startUp = config.standalone() ? config.LOAD : config.START_UP; + setup.standAlone = config.standalone(); + setup.dataDir = dir_; + setup.useGlobalPragma = true; + + acquireInfo_->SQLiteDB = std::make_unique( + setup, + AcquireShardDBName, + AcquireShardDBPragma, + AcquireShardDBInit, + DatabaseCon::CheckpointerSetup{&app_.getJobQueue(), &app_.logs()}); + state_ = acquire; + }; + + try + { + // Open or create the NuDB key/value store + preexist = exists(dir_); + backend_->open(!preexist); + + if (!preexist) + { + // A new shard + createAcquireInfo(); + acquireInfo_->SQLiteDB->getSession() + << "INSERT INTO Shard (ShardIndex) " + "VALUES (:shardIndex);", + soci::use(index_); + } + else if (exists(dir_ / AcquireShardDBName)) + { + // A shard being acquired, backend is likely incomplete + createAcquireInfo(); + + auto& session{acquireInfo_->SQLiteDB->getSession()}; + boost::optional index; + soci::blob sociBlob(session); + soci::indicator blobPresent; + + session << "SELECT ShardIndex, StoredLedgerSeqs " + "FROM Shard " + "WHERE ShardIndex = :index;", + soci::into(index), soci::into(sociBlob, blobPresent), + soci::use(index_); + + if (!index || index != index_) + return fail("invalid acquire SQLite database"); + + if (blobPresent == soci::i_ok) + { + std::string s; + auto& storedSeqs{acquireInfo_->storedSeqs}; + if (convert(sociBlob, s); !from_string(storedSeqs, s)) + return fail("invalid StoredLedgerSeqs"); + + if (boost::icl::first(storedSeqs) < firstSeq_ || + boost::icl::last(storedSeqs) > lastSeq_) + { + return fail("invalid StoredLedgerSeqs"); + } + + // Check if backend is complete + if (boost::icl::length(storedSeqs) == maxLedgers_) + state_ = complete; + } + } + else + { + // A shard that is final or its backend is complete + // and ready to be finalized + std::shared_ptr nodeObject; + if (backend_->fetch(finalKey.data(), &nodeObject) != Status::ok) + { + legacy_ = true; + return fail("incompatible, missing backend final key"); + } + + // Check final key's value + SerialIter sIt( + nodeObject->getData().data(), nodeObject->getData().size()); + if (sIt.get32() != version) + return fail("invalid version"); + + if (sIt.get32() != firstSeq_ || sIt.get32() != lastSeq_) + return fail("out of range ledger sequences"); + + if (sIt.get256().isZero()) + return fail("invalid last ledger hash"); + + if (exists(dir_ / LgrDBName) && exists(dir_ / TxDBName)) + { + lastAccess_ = std::chrono::steady_clock::now(); + state_ = final; + } + else + state_ = complete; + } + } + catch (std::exception const& e) + { + return fail( + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what()); + } + + // Set backend caches + auto const size{config.getValueFor(SizedItem::nodeCacheSize, 0)}; + auto const age{ + std::chrono::seconds{config.getValueFor(SizedItem::nodeCacheAge, 0)}}; + auto const name{"shard " + std::to_string(index_)}; + pCache_ = std::make_unique(name, size, age, stopwatch(), j_); + nCache_ = std::make_unique(name, stopwatch(), size, age); + + if (!initSQLite(lock)) + return fail({}); + + setFileStats(lock); + return true; +} + +bool +Shard::initSQLite(std::lock_guard const&) { Config const& config{app_.config()}; DatabaseCon::Setup const setup = [&]() { - DatabaseCon::Setup result; - result.startUp = config.START_UP; - result.standAlone = config.standalone(); - result.dataDir = dir_; - result.useGlobalPragma = !backendComplete_; - return result; + DatabaseCon::Setup setup; + setup.startUp = config.standalone() ? config.LOAD : config.START_UP; + setup.standAlone = config.standalone(); + setup.dataDir = dir_; + setup.useGlobalPragma = (state_ != complete); + return setup; }(); try @@ -687,7 +1013,7 @@ Shard::initSQLite(std::lock_guard const&) if (txSQLiteDB_) txSQLiteDB_.reset(); - if (backendComplete_) + if (state_ != acquire) { lgrSQLiteDB_ = std::make_unique( setup, LgrDBName, CompleteShardDBPragma, LgrDBInit); @@ -731,22 +1057,24 @@ Shard::initSQLite(std::lock_guard const&) } catch (std::exception const& e) { - JLOG(j_.fatal()) << "shard " << index_ << " exception " << e.what() - << " in function " << __func__; + JLOG(j_.fatal()) << "shard " << index_ + << ". Exception caught in function " << __func__ + << ". Error: " << e.what(); return false; } + return true; } bool Shard::storeSQLite( std::shared_ptr const& ledger, - std::lock_guard const&) + std::lock_guard const&) { if (stop_) return false; - auto const seq{ledger->info().seq}; + auto const ledgerSeq{ledger->info().seq}; try { @@ -757,14 +1085,14 @@ Shard::storeSQLite( session << "DELETE FROM Transactions " "WHERE LedgerSeq = :seq;", - soci::use(seq); + soci::use(ledgerSeq); session << "DELETE FROM AccountTransactions " "WHERE LedgerSeq = :seq;", - soci::use(seq); + soci::use(ledgerSeq); if (ledger->info().txHash.isNonZero()) { - auto const sSeq{std::to_string(seq)}; + auto const sSeq{std::to_string(ledgerSeq)}; if (!ledger->txMap().isValid()) { JLOG(j_.error()) << "shard " << index_ @@ -826,7 +1154,7 @@ Shard::storeSQLite( session << (STTx::getMetaSQLInsertReplaceHeader() + item.first->getMetaSQL( - seq, sqlEscape(s.modData())) + + ledgerSeq, sqlEscape(s.modData())) + ';'); } } @@ -848,7 +1176,7 @@ Shard::storeSQLite( session << "DELETE FROM Ledgers " "WHERE LedgerSeq = :seq;", - soci::use(seq); + soci::use(ledgerSeq); session << "INSERT OR REPLACE INTO Ledgers (" "LedgerHash, LedgerSeq, PrevHash, TotalCoins, ClosingTime," @@ -858,7 +1186,7 @@ Shard::storeSQLite( ":ledgerHash, :ledgerSeq, :prevHash, :totalCoins," ":closingTime, :prevClosingTime, :closeTimeRes," ":closeFlags, :accountSetHash, :transSetHash);", - soci::use(sHash), soci::use(seq), soci::use(sParentHash), + soci::use(sHash), soci::use(ledgerSeq), soci::use(sParentHash), soci::use(sDrops), soci::use(ledger->info().closeTime.time_since_epoch().count()), soci::use( @@ -899,15 +1227,17 @@ Shard::storeSQLite( } catch (std::exception const& e) { - JLOG(j_.fatal()) << "shard " << index_ << " exception " << e.what() - << " in function " << __func__; + JLOG(j_.fatal()) << "shard " << index_ + << ". Exception caught in function " << __func__ + << ". Error: " << e.what(); return false; } + return true; } void -Shard::setFileStats(std::lock_guard const&) +Shard::setFileStats(std::lock_guard const&) { fileSz_ = 0; fdRequired_ = 0; @@ -925,18 +1255,19 @@ Shard::setFileStats(std::lock_guard const&) } catch (std::exception const& e) { - JLOG(j_.error()) << "shard " << index_ << " exception " << e.what() - << " in function " << __func__; + JLOG(j_.fatal()) << "shard " << index_ + << ". Exception caught in function " << __func__ + << ". Error: " << e.what(); } } bool -Shard::valLedger( +Shard::verifyLedger( std::shared_ptr const& ledger, std::shared_ptr const& next) const { auto fail = [j = j_, index = index_, &ledger](std::string const& msg) { - JLOG(j.fatal()) << "shard " << index << ". " << msg + JLOG(j.error()) << "shard " << index << ". " << msg << (ledger->info().hash.isZero() ? "" : ". Ledger hash " + to_string(ledger->info().hash)) @@ -955,7 +1286,7 @@ Shard::valLedger( auto visit = [this, &error](SHAMapAbstractNode& node) { if (stop_) return false; - if (!valFetch(node.getNodeHash().as_uint256())) + if (!verifyFetch(node.getNodeHash().as_uint256())) error = true; return !error; }; @@ -976,9 +1307,10 @@ Shard::valLedger( catch (std::exception const& e) { return fail( - std::string("exception ") + e.what() + " in function " + - __func__); + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what()); } + if (stop_) return false; if (error) @@ -998,8 +1330,8 @@ Shard::valLedger( catch (std::exception const& e) { return fail( - std::string("exception ") + e.what() + " in function " + - __func__); + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what()); } if (stop_) return false; @@ -1011,26 +1343,27 @@ Shard::valLedger( } std::shared_ptr -Shard::valFetch(uint256 const& hash) const +Shard::verifyFetch(uint256 const& hash) const { - std::shared_ptr nObj; - auto fail = [j = j_, index = index_, &hash, &nObj](std::string const& msg) { - JLOG(j.fatal()) << "shard " << index << ". " << msg - << ". Node object hash " << to_string(hash); - nObj.reset(); - return nObj; - }; + std::shared_ptr nodeObject; + auto fail = + [j = j_, index = index_, &hash, &nodeObject](std::string const& msg) { + JLOG(j.error()) << "shard " << index << ". " << msg + << ". Node object hash " << to_string(hash); + nodeObject.reset(); + return nodeObject; + }; try { - switch (backend_->fetch(hash.data(), &nObj)) + switch (backend_->fetch(hash.data(), &nodeObject)) { case ok: - // This verifies that the hash of node object matches the - // payload - if (nObj->getHash() != sha512Half(makeSlice(nObj->getData()))) + // Verify that the hash of node object matches the payload + if (nodeObject->getHash() != + sha512Half(makeSlice(nodeObject->getData()))) return fail("Node object hash does not match payload"); - return nObj; + return nodeObject; case notFound: return fail("Missing node object"); case dataCorrupt: @@ -1042,9 +1375,33 @@ Shard::valFetch(uint256 const& hash) const catch (std::exception const& e) { return fail( - std::string("exception ") + e.what() + " in function " + __func__); + std::string(". Exception caught in function ") + __func__ + + ". Error: " + e.what()); } } +Shard::Count +Shard::makeBackendCount() +{ + if (stop_) + return {nullptr}; + + std::lock_guard lock(mutex_); + if (!backend_) + { + JLOG(j_.error()) << "shard " << index_ << " not initialized"; + return {nullptr}; + } + if (!backend_->isOpen()) + { + if (!open(lock)) + return {nullptr}; + } + else if (state_ == final) + lastAccess_ = std::chrono::steady_clock::now(); + + return Shard::Count(&backendCount_); +} + } // namespace NodeStore } // namespace ripple diff --git a/src/ripple/nodestore/impl/Shard.h b/src/ripple/nodestore/impl/Shard.h index 55fb348cd..ec01134e0 100644 --- a/src/ripple/nodestore/impl/Shard.h +++ b/src/ripple/nodestore/impl/Shard.h @@ -51,6 +51,18 @@ class DatabaseShard; class Shard final { public: + enum class State { + acquire, // Being acquired + complete, // Backend contains all ledgers but is not yet final + finalizing, // Being finalized + final // Database verified, shard is immutable + }; + + static constexpr State acquire = State::acquire; + static constexpr State complete = State::complete; + static constexpr State finalizing = State::finalizing; + static constexpr State final = State::final; + Shard( Application& app, DatabaseShard const& db, @@ -66,70 +78,114 @@ public: ~Shard(); + /** Initialize shard. + + @param scheduler The scheduler to use for performing asynchronous tasks. + @param context The context to use for the backend. + */ + [[nodiscard]] bool + init(Scheduler& scheduler, nudb::context& context); + + /** Returns true if the database are open. + */ + [[nodiscard]] bool + isOpen() const; + + /** Try to close databases if not in use. + + @return true if databases were closed. + */ bool - open(Scheduler& scheduler, nudb::context& ctx); + tryClose(); + /** Notify shard to prepare for shutdown. + */ void - closeAll(); + stop() + { + stop_ = true; + } - boost::optional + [[nodiscard]] boost::optional prepare(); - bool - store(std::shared_ptr const& ledger); + [[nodiscard]] bool + storeNodeObject(std::shared_ptr const& nodeObject); - bool - containsLedger(std::uint32_t seq) const; + [[nodiscard]] std::shared_ptr + fetchNodeObject(uint256 const& hash, FetchReport& fetchReport); + + [[nodiscard]] bool + fetchNodeObjectFromCache( + uint256 const& hash, + std::shared_ptr& nodeObject); + + /** Store a ledger. + + @param srcLedger The ledger to store. + @param next The ledger that immediately follows srcLedger, can be null. + @return StoreLedgerResult containing data about the store. + */ + struct StoreLedgerResult + { + std::uint64_t count{0}; // Number of storage calls + std::uint64_t size{0}; // Number of bytes stored + bool error{false}; + }; + + [[nodiscard]] StoreLedgerResult + storeLedger( + std::shared_ptr const& srcLedger, + std::shared_ptr const& next); + + [[nodiscard]] bool + setLedgerStored(std::shared_ptr const& ledger); + + [[nodiscard]] bool + containsLedger(std::uint32_t ledgerSeq) const; void sweep(); - std::uint32_t + [[nodiscard]] std::uint32_t index() const { return index_; } - boost::filesystem::path const& + [[nodiscard]] boost::filesystem::path const& getDir() const { return dir_; } - std::tuple< - std::shared_ptr, - std::shared_ptr, - std::shared_ptr> - getBackendAll() const; + [[nodiscard]] int + getDesiredAsyncReadCount(); - std::shared_ptr - getBackend() const; + [[nodiscard]] float + getCacheHitRate(); - /** Returns `true` if all shard ledgers have been stored in the backend - */ - bool - isBackendComplete() const; - - std::shared_ptr - pCache() const; - - std::shared_ptr - nCache() const; + [[nodiscard]] std::chrono::steady_clock::time_point + getLastUse() const; /** Returns a pair where the first item describes the storage space utilized and the second item is the number of file descriptors required. */ - std::pair - fileInfo() const; + [[nodiscard]] std::pair + getFileInfo() const; - /** Returns `true` if the shard is complete, validated, and immutable. - */ - bool - isFinal() const; + [[nodiscard]] State + getState() const + { + return state_; + } - /** Returns `true` if the shard is older, without final key data + [[nodiscard]] std::int32_t + getWriteLoad(); + + /** Returns `true` if shard is older, without final key data */ - bool + [[nodiscard]] bool isLegacy() const; /** Finalize shard by walking its ledgers and verifying each Merkle tree. @@ -139,20 +195,13 @@ public: @param referenceHash If present, this hash must match the hash of the last ledger in the shard. */ - bool + [[nodiscard]] bool finalize( bool const writeSQLite, boost::optional const& referenceHash); - void - stop() - { - stop_ = true; - } - - /** If called, the shard directory will be removed when - the shard is destroyed. - */ + /** Enables removal of the shard directory on destruction. + */ void removeOnDestroy() { @@ -168,6 +217,41 @@ public: static uint256 const finalKey; private: + class Count final + { + public: + Count(Count const&) = delete; + Count& + operator=(Count&&) = delete; + Count& + operator=(Count const&) = delete; + + Count(Count&& other) : counter_(other.counter_) + { + other.counter_ = nullptr; + } + + Count(std::atomic* counter) : counter_(counter) + { + if (counter_) + ++(*counter_); + } + + ~Count() + { + if (counter_) + --(*counter_); + } + + operator bool() const + { + return counter_ != nullptr; + } + + private: + std::atomic* counter_; + }; + struct AcquireInfo { // SQLite database to track information about what has been acquired @@ -178,7 +262,8 @@ private: }; Application& app_; - mutable std::recursive_mutex mutex_; + beast::Journal const j_; + mutable std::mutex mutex_; // Shard Index std::uint32_t const index_; @@ -194,10 +279,10 @@ private: std::uint32_t const maxLedgers_; // Database positive cache - std::shared_ptr pCache_; + std::unique_ptr pCache_; // Database negative cache - std::shared_ptr nCache_; + std::unique_ptr nCache_; // Path to database files boost::filesystem::path const dir_; @@ -209,7 +294,9 @@ private: std::uint32_t fdRequired_{0}; // NuDB key/value store for node objects - std::shared_ptr backend_; + std::unique_ptr backend_; + + std::atomic backendCount_{0}; // Ledger SQLite database used for indexes std::unique_ptr lgrSQLiteDB_; @@ -221,50 +308,55 @@ private: // If the shard is final, this member will be null. std::unique_ptr acquireInfo_; - beast::Journal const j_; - - // True if backend has stored all ledgers pertaining to the shard - bool backendComplete_{false}; - // Older shard without an acquire database or final key // Eventually there will be no need for this and should be removed bool legacy_{false}; - // True if the backend has a final key stored - bool final_{false}; - // Determines if the shard needs to stop processing for shutdown std::atomic stop_{false}; + std::atomic state_{State::acquire}; + // Determines if the shard directory should be removed in the destructor std::atomic removeOnDestroy_{false}; + // The time of the last access of a shard that has a final state + std::chrono::steady_clock::time_point lastAccess_; + + // Open shard databases + [[nodiscard]] bool + open(std::lock_guard const& lock); + // Open/Create SQLite databases // Lock over mutex_ required - bool - initSQLite(std::lock_guard const& lock); + [[nodiscard]] bool + initSQLite(std::lock_guard const&); // Write SQLite entries for this ledger // Lock over mutex_ required - bool + [[nodiscard]] bool storeSQLite( std::shared_ptr const& ledger, - std::lock_guard const& lock); + std::lock_guard const&); // Set storage and file descriptor usage stats // Lock over mutex_ required void - setFileStats(std::lock_guard const& lock); + setFileStats(std::lock_guard const&); // Validate this ledger by walking its SHAMaps and verifying Merkle trees - bool - valLedger( + [[nodiscard]] bool + verifyLedger( std::shared_ptr const& ledger, std::shared_ptr const& next) const; // Fetches from backend and log errors based on status codes - std::shared_ptr - valFetch(uint256 const& hash) const; + [[nodiscard]] std::shared_ptr + verifyFetch(uint256 const& hash) const; + + // Open databases if they are closed + [[nodiscard]] Shard::Count + makeBackendCount(); }; } // namespace NodeStore diff --git a/src/ripple/overlay/impl/PeerImp.cpp b/src/ripple/overlay/impl/PeerImp.cpp index fa514253e..8ed6955f1 100644 --- a/src/ripple/overlay/impl/PeerImp.cpp +++ b/src/ripple/overlay/impl/PeerImp.cpp @@ -2221,21 +2221,22 @@ PeerImp::onMessage(std::shared_ptr const& m) // VFALCO TODO Move this someplace more sensible so we dont // need to inject the NodeStore interfaces. std::uint32_t seq{obj.has_ledgerseq() ? obj.ledgerseq() : 0}; - auto hObj{app_.getNodeStore().fetch(hash, seq)}; - if (!hObj) + auto nodeObject{app_.getNodeStore().fetchNodeObject(hash, seq)}; + if (!nodeObject) { if (auto shardStore = app_.getShardStore()) { if (seq >= shardStore->earliestLedgerSeq()) - hObj = shardStore->fetch(hash, seq); + nodeObject = shardStore->fetchNodeObject(hash, seq); } } - if (hObj) + if (nodeObject) { protocol::TMIndexedObject& newObj = *reply.add_objects(); newObj.set_hash(hash.begin(), hash.size()); newObj.set_data( - &hObj->getData().front(), hObj->getData().size()); + &nodeObject->getData().front(), + nodeObject->getData().size()); if (obj.has_nodeid()) newObj.set_index(obj.nodeid()); diff --git a/src/ripple/rpc/handlers/GetCounts.cpp b/src/ripple/rpc/handlers/GetCounts.cpp index 314ad190a..55c264442 100644 --- a/src/ripple/rpc/handlers/GetCounts.cpp +++ b/src/ripple/rpc/handlers/GetCounts.cpp @@ -120,10 +120,11 @@ getCountsJson(Application& app, int minObjectCount) textTime(uptime, s, "second", 1s); ret[jss::uptime] = uptime; - ret[jss::node_writes] = app.getNodeStore().getStoreCount(); + ret[jss::node_writes] = std::to_string(app.getNodeStore().getStoreCount()); ret[jss::node_reads_total] = app.getNodeStore().getFetchTotalCount(); ret[jss::node_reads_hit] = app.getNodeStore().getFetchHitCount(); - ret[jss::node_written_bytes] = app.getNodeStore().getStoreSize(); + ret[jss::node_written_bytes] = + std::to_string(app.getNodeStore().getStoreSize()); ret[jss::node_read_bytes] = app.getNodeStore().getFetchSize(); if (auto shardStore = app.getShardStore()) @@ -137,10 +138,11 @@ getCountsJson(Application& app, int minObjectCount) jv[jss::treenode_track_size] = trackSz; ret[jss::write_load] = shardStore->getWriteLoad(); ret[jss::node_hit_rate] = shardStore->getCacheHitRate(); - jv[jss::node_writes] = shardStore->getStoreCount(); + jv[jss::node_writes] = std::to_string(shardStore->getStoreCount()); jv[jss::node_reads_total] = shardStore->getFetchTotalCount(); jv[jss::node_reads_hit] = shardStore->getFetchHitCount(); - jv[jss::node_written_bytes] = shardStore->getStoreSize(); + jv[jss::node_written_bytes] = + std::to_string(shardStore->getStoreSize()); jv[jss::node_read_bytes] = shardStore->getFetchSize(); } diff --git a/src/ripple/shamap/impl/SHAMap.cpp b/src/ripple/shamap/impl/SHAMap.cpp index 700b63358..19dff57c9 100644 --- a/src/ripple/shamap/impl/SHAMap.cpp +++ b/src/ripple/shamap/impl/SHAMap.cpp @@ -151,12 +151,13 @@ SHAMap::fetchNodeFromDB(SHAMapHash const& hash) const if (backed_) { - if (auto obj = f_.db().fetch(hash.as_uint256(), ledgerSeq_)) + if (auto nodeObject = + f_.db().fetchNodeObject(hash.as_uint256(), ledgerSeq_)) { try { node = SHAMapAbstractNode::makeFromPrefix( - makeSlice(obj->getData()), hash); + makeSlice(nodeObject->getData()), hash); if (node) canonicalize(hash, node); } diff --git a/src/test/nodestore/DatabaseShard_test.cpp b/src/test/nodestore/DatabaseShard_test.cpp index fd59c6687..d760abf4e 100644 --- a/src/test/nodestore/DatabaseShard_test.cpp +++ b/src/test/nodestore/DatabaseShard_test.cpp @@ -272,7 +272,7 @@ class DatabaseShard_test : public TestBase { // Store header { - Serializer s(128); + Serializer s(sizeof(std::uint32_t) + sizeof(LedgerInfo)); s.add32(HashPrefix::ledgerMaster); addRaw(ledger.info(), s); db.store( @@ -369,8 +369,8 @@ class DatabaseShard_test : public TestBase if (!BEAST_EXPECT(nSrc)) return false; - auto nDst = - db.fetch(node.getNodeHash().as_uint256(), ledger.info().seq); + auto nDst = db.fetchNodeObject( + node.getNodeHash().as_uint256(), ledger.info().seq); if (!BEAST_EXPECT(nDst)) return false; @@ -393,8 +393,8 @@ class DatabaseShard_test : public TestBase if (!BEAST_EXPECT(nSrc)) return false; - auto nDst = - db.fetch(node.getNodeHash().as_uint256(), ledger.info().seq); + auto nDst = db.fetchNodeObject( + node.getNodeHash().as_uint256(), ledger.info().seq); if (!BEAST_EXPECT(nDst)) return false; @@ -432,22 +432,13 @@ class DatabaseShard_test : public TestBase std::unique_ptr testConfig( - std::string const& testName, - std::string const& backendType, std::string const& shardDir, std::string const& nodeDir = std::string()) { using namespace test::jtx; - if (testName != "") - { - std::string caseName = - "DatabaseShard " + testName + " with backend " + backendType; - testcase(caseName); - } - return envconfig([&](std::unique_ptr cfg) { - cfg->overwrite(ConfigSection::shardDatabase(), "type", backendType); + // Shard store configuration cfg->overwrite(ConfigSection::shardDatabase(), "path", shardDir); cfg->overwrite( ConfigSection::shardDatabase(), @@ -461,20 +452,16 @@ class DatabaseShard_test : public TestBase ConfigSection::shardDatabase(), "earliest_seq", std::to_string(earliestSeq)); - cfg->overwrite(ConfigSection::nodeDatabase(), "type", backendType); - cfg->overwrite( - ConfigSection::nodeDatabase(), - "max_size_gb", - std::to_string(maxSizeGb)); + + // Node store configuration cfg->overwrite( ConfigSection::nodeDatabase(), "earliest_seq", std::to_string(earliestSeq)); - if (nodeDir.empty()) - cfg->overwrite( - ConfigSection::nodeDatabase(), "path", defNodeDir.path()); - else - cfg->overwrite(ConfigSection::nodeDatabase(), "path", nodeDir); + cfg->overwrite( + ConfigSection::nodeDatabase(), + "path", + nodeDir.empty() ? defNodeDir.path() : nodeDir); return cfg; }); } @@ -482,21 +469,21 @@ class DatabaseShard_test : public TestBase std::optional waitShard( DatabaseShard& db, - int shardNumber, + int shardIndex, std::chrono::seconds timeout = shardStoreTimeout) { RangeSet rs; auto start = std::chrono::system_clock::now(); auto end = start + timeout; while (!from_string(rs, db.getCompleteShards()) || - !boost::icl::contains(rs, shardNumber)) + !boost::icl::contains(rs, shardIndex)) { if (!BEAST_EXPECT(std::chrono::system_clock::now() < end)) return {}; std::this_thread::yield(); } - return shardNumber; + return shardIndex; } std::optional @@ -506,16 +493,19 @@ class DatabaseShard_test : public TestBase int maxShardNumber = 1, int ledgerOffset = 0) { - int shardNumber = -1; + int shardIndex{-1}; for (std::uint32_t i = 0; i < ledgersPerShard; ++i) { - auto ind = db.prepareLedger((maxShardNumber + 1) * ledgersPerShard); - if (!BEAST_EXPECT(ind != boost::none)) + auto const ledgerSeq{ + db.prepareLedger((maxShardNumber + 1) * ledgersPerShard)}; + if (!BEAST_EXPECT(ledgerSeq != boost::none)) return {}; - shardNumber = db.seqToShardIndex(*ind); - int arrInd = - *ind - (ledgersPerShard * ledgerOffset) - ledgersPerShard - 1; + + shardIndex = db.seqToShardIndex(*ledgerSeq); + + int const arrInd = *ledgerSeq - (ledgersPerShard * ledgerOffset) - + ledgersPerShard - 1; BEAST_EXPECT( arrInd >= 0 && arrInd < maxShardNumber * ledgersPerShard); BEAST_EXPECT(saveLedger(db, *data.ledgers_[arrInd])); @@ -524,24 +514,27 @@ class DatabaseShard_test : public TestBase uint256 const finalKey_{0}; Serializer s; s.add32(Shard::version); - s.add32(db.firstLedgerSeq(shardNumber)); - s.add32(db.lastLedgerSeq(shardNumber)); + s.add32(db.firstLedgerSeq(shardIndex)); + s.add32(db.lastLedgerSeq(shardIndex)); s.addRaw(data.ledgers_[arrInd]->info().hash.data(), 256 / 8); - db.store(hotUNKNOWN, std::move(s.modData()), finalKey_, *ind); + db.store( + hotUNKNOWN, std::move(s.modData()), finalKey_, *ledgerSeq); } db.setStored(data.ledgers_[arrInd]); } - return waitShard(db, shardNumber); + return waitShard(db, shardIndex); } void - testStandalone(std::string const& backendType) + testStandalone() { + testcase("Standalone"); + using namespace test::jtx; beast::temp_dir shardDir; - Env env{*this, testConfig("standalone", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DummyScheduler scheduler; RootStoppable parent("TestRootStoppable"); @@ -563,14 +556,14 @@ class DatabaseShard_test : public TestBase } void - testCreateShard( - std::string const& backendType, - std::uint64_t const seedValue) + testCreateShard(std::uint64_t const seedValue) { + testcase("Create shard"); + using namespace test::jtx; beast::temp_dir shardDir; - Env env{*this, testConfig("createShard", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -586,17 +579,15 @@ class DatabaseShard_test : public TestBase } void - testReopenDatabase( - std::string const& backendType, - std::uint64_t const seedValue) + testReopenDatabase(std::uint64_t const seedValue) { + testcase("Reopen shard store"); + using namespace test::jtx; beast::temp_dir shardDir; { - Env env{ - *this, - testConfig("reopenDatabase", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -609,7 +600,7 @@ class DatabaseShard_test : public TestBase return; } { - Env env{*this, testConfig("", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -626,16 +617,14 @@ class DatabaseShard_test : public TestBase } void - testGetCompleteShards( - std::string const& backendType, - std::uint64_t const seedValue) + testGetCompleteShards(std::uint64_t const seedValue) { + testcase("Get complete shards"); + using namespace test::jtx; beast::temp_dir shardDir; - Env env{ - *this, - testConfig("getCompleteShards", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -658,15 +647,14 @@ class DatabaseShard_test : public TestBase } void - testPrepareShard( - std::string const& backendType, - std::uint64_t const seedValue) + testPrepareShard(std::uint64_t const seedValue) { + testcase("Prepare shard"); + using namespace test::jtx; beast::temp_dir shardDir; - Env env{ - *this, testConfig("prepareShard", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -727,19 +715,17 @@ class DatabaseShard_test : public TestBase } void - testImportShard( - std::string const& backendType, - std::uint64_t const seedValue) + testImportShard(std::uint64_t const seedValue) { + testcase("Import shard"); + using namespace test::jtx; beast::temp_dir importDir; TestData data(seedValue, 2); { - Env env{ - *this, - testConfig("importShard", backendType, importDir.path())}; + Env env{*this, testConfig(importDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -760,7 +746,7 @@ class DatabaseShard_test : public TestBase { beast::temp_dir shardDir; - Env env{*this, testConfig("", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -769,8 +755,14 @@ class DatabaseShard_test : public TestBase db->prepareShard(1); BEAST_EXPECT(db->getPreShards() == bitmask2Rangeset(2)); + + using namespace boost::filesystem; + remove_all(importPath / LgrDBName); + remove_all(importPath / TxDBName); + if (!BEAST_EXPECT(db->importShard(1, importPath))) return; + BEAST_EXPECT(db->getPreShards() == ""); auto n = waitShard(*db, 1); @@ -783,20 +775,17 @@ class DatabaseShard_test : public TestBase } void - testCorruptedDatabase( - std::string const& backendType, - std::uint64_t const seedValue) + testCorruptedDatabase(std::uint64_t const seedValue) { + testcase("Corrupted shard store"); + using namespace test::jtx; beast::temp_dir shardDir; { TestData data(seedValue, 4, 2); { - Env env{ - *this, - testConfig( - "corruptedDatabase", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -810,7 +799,7 @@ class DatabaseShard_test : public TestBase boost::filesystem::path path = shardDir.path(); path /= std::string("2"); - path /= backendType + ".dat"; + path /= "nudb.dat"; FILE* f = fopen(path.string().c_str(), "r+b"); if (!BEAST_EXPECT(f)) @@ -820,42 +809,36 @@ class DatabaseShard_test : public TestBase BEAST_EXPECT(fwrite(buf, 1, 256, f) == 256); fclose(f); } - { - Env env{*this, testConfig("", backendType, shardDir.path())}; - DatabaseShard* db = env.app().getShardStore(); - BEAST_EXPECT(db); - TestData data(seedValue, 4, 2); - if (!BEAST_EXPECT(data.makeLedgers(env))) - return; + Env env{*this, testConfig(shardDir.path())}; + DatabaseShard* db = env.app().getShardStore(); + BEAST_EXPECT(db); - for (std::uint32_t i = 1; i <= 1; ++i) - waitShard(*db, i); + TestData data(seedValue, 4, 2); + if (!BEAST_EXPECT(data.makeLedgers(env))) + return; - BEAST_EXPECT(db->getCompleteShards() == bitmask2Rangeset(0x2)); + for (std::uint32_t i = 1; i <= 1; ++i) + waitShard(*db, i); - for (std::uint32_t i = 0; i < 1 * ledgersPerShard; ++i) - checkLedger(data, *db, *data.ledgers_[i]); - } + BEAST_EXPECT(db->getCompleteShards() == bitmask2Rangeset(0x2)); + + for (std::uint32_t i = 0; i < 1 * ledgersPerShard; ++i) + checkLedger(data, *db, *data.ledgers_[i]); } void - testIllegalFinalKey( - std::string const& backendType, - std::uint64_t const seedValue) + testIllegalFinalKey(std::uint64_t const seedValue) { + testcase("Illegal finalKey"); + using namespace test::jtx; for (int i = 0; i < 5; ++i) { beast::temp_dir shardDir; { - Env env{ - *this, - testConfig( - (i == 0 ? "illegalFinalKey" : ""), - backendType, - shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -863,14 +846,16 @@ class DatabaseShard_test : public TestBase if (!BEAST_EXPECT(data.makeLedgers(env))) return; - int shardNumber = -1; + int shardIndex{-1}; for (std::uint32_t j = 0; j < ledgersPerShard; ++j) { - auto ind = db->prepareLedger(2 * ledgersPerShard); - if (!BEAST_EXPECT(ind != boost::none)) + auto const ledgerSeq{ + db->prepareLedger(2 * ledgersPerShard)}; + if (!BEAST_EXPECT(ledgerSeq != boost::none)) return; - shardNumber = db->seqToShardIndex(*ind); - int arrInd = *ind - ledgersPerShard - 1; + + shardIndex = db->seqToShardIndex(*ledgerSeq); + int arrInd = *ledgerSeq - ledgersPerShard - 1; BEAST_EXPECT(arrInd >= 0 && arrInd < ledgersPerShard); BEAST_EXPECT(saveLedger(*db, *data.ledgers_[arrInd])); if (arrInd % ledgersPerShard == (ledgersPerShard - 1)) @@ -878,8 +863,8 @@ class DatabaseShard_test : public TestBase uint256 const finalKey_{0}; Serializer s; s.add32(Shard::version + (i == 0)); - s.add32(db->firstLedgerSeq(shardNumber) + (i == 1)); - s.add32(db->lastLedgerSeq(shardNumber) - (i == 3)); + s.add32(db->firstLedgerSeq(shardIndex) + (i == 1)); + s.add32(db->lastLedgerSeq(shardIndex) - (i == 3)); s.addRaw( data.ledgers_[arrInd - (i == 4)] ->info() @@ -889,13 +874,13 @@ class DatabaseShard_test : public TestBase hotUNKNOWN, std::move(s.modData()), finalKey_, - *ind); + *ledgerSeq); } db->setStored(data.ledgers_[arrInd]); } if (i == 2) - waitShard(*db, shardNumber); + waitShard(*db, shardIndex); else { boost::filesystem::path path(shardDir.path()); @@ -916,7 +901,7 @@ class DatabaseShard_test : public TestBase } { - Env env{*this, testConfig("", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -941,17 +926,16 @@ class DatabaseShard_test : public TestBase } void - testImport(std::string const& backendType, std::uint64_t const seedValue) + testImport(std::uint64_t const seedValue) { + testcase("Import node store"); + using namespace test::jtx; beast::temp_dir shardDir; { beast::temp_dir nodeDir; - Env env{ - *this, - testConfig( - "import", backendType, shardDir.path(), nodeDir.path())}; + Env env{*this, testConfig(shardDir.path(), nodeDir.path())}; DatabaseShard* db = env.app().getShardStore(); Database& ndb = env.app().getNodeStore(); BEAST_EXPECT(db); @@ -970,7 +954,7 @@ class DatabaseShard_test : public TestBase BEAST_EXPECT(db->getCompleteShards() == bitmask2Rangeset(0x6)); } { - Env env{*this, testConfig("", backendType, shardDir.path())}; + Env env{*this, testConfig(shardDir.path())}; DatabaseShard* db = env.app().getShardStore(); BEAST_EXPECT(db); @@ -989,10 +973,10 @@ class DatabaseShard_test : public TestBase } void - testImportWithHistoricalPaths( - std::string const& backendType, - std::uint64_t const seedValue) + testImportWithHistoricalPaths(std::uint64_t const seedValue) { + testcase("Import with historical paths"); + using namespace test::jtx; // Test importing with multiple historical @@ -1009,11 +993,7 @@ class DatabaseShard_test : public TestBase [](const beast::temp_dir& dir) { return dir.path(); }); beast::temp_dir nodeDir; - auto c = testConfig( - "importWithHistoricalPaths", - backendType, - shardDir.path(), - nodeDir.path()); + auto c = testConfig(shardDir.path(), nodeDir.path()); auto& historyPaths = c->section(SECTION_HISTORICAL_SHARD_PATHS); historyPaths.append( @@ -1075,11 +1055,7 @@ class DatabaseShard_test : public TestBase beast::temp_dir historicalDir; beast::temp_dir nodeDir; - auto c = testConfig( - "importWithSingleHistoricalPath", - backendType, - shardDir.path(), - nodeDir.path()); + auto c = testConfig(shardDir.path(), nodeDir.path()); auto& historyPaths = c->section(SECTION_HISTORICAL_SHARD_PATHS); historyPaths.append({historicalDir.path()}); @@ -1125,10 +1101,10 @@ class DatabaseShard_test : public TestBase } void - testPrepareWithHistoricalPaths( - std::string const& backendType, - std::uint64_t const seedValue) + testPrepareWithHistoricalPaths(std::uint64_t const seedValue) { + testcase("Prepare with historical paths"); + using namespace test::jtx; // Test importing with multiple historical @@ -1145,8 +1121,7 @@ class DatabaseShard_test : public TestBase [](const beast::temp_dir& dir) { return dir.path(); }); beast::temp_dir nodeDir; - auto c = testConfig( - "prepareWithHistoricalPaths", backendType, shardDir.path()); + auto c = testConfig(shardDir.path()); auto& historyPaths = c->section(SECTION_HISTORICAL_SHARD_PATHS); historyPaths.append( @@ -1300,20 +1275,53 @@ class DatabaseShard_test : public TestBase } void - testAll(std::string const& backendType) + testOpenShardManagement(std::uint64_t const seedValue) { - std::uint64_t const seedValue = 51; - testStandalone(backendType); - testCreateShard(backendType, seedValue); - testReopenDatabase(backendType, seedValue + 5); - testGetCompleteShards(backendType, seedValue + 10); - testPrepareShard(backendType, seedValue + 20); - testImportShard(backendType, seedValue + 30); - testCorruptedDatabase(backendType, seedValue + 40); - testIllegalFinalKey(backendType, seedValue + 50); - testImport(backendType, seedValue + 60); - testImportWithHistoricalPaths(backendType, seedValue + 80); - testPrepareWithHistoricalPaths(backendType, seedValue + 90); + testcase("Open shard management"); + + using namespace test::jtx; + + beast::temp_dir shardDir; + Env env{*this, testConfig(shardDir.path())}; + + auto shardStore{env.app().getShardStore()}; + BEAST_EXPECT(shardStore); + + // Create one shard more than the open final limit + auto const openFinalLimit{env.app().config().getValueFor( + SizedItem::openFinalLimit, boost::none)}; + auto const numShards{openFinalLimit + 1}; + + TestData data(seedValue, 2, numShards); + if (!BEAST_EXPECT(data.makeLedgers(env))) + return; + + BEAST_EXPECT(shardStore->getCompleteShards().empty()); + + int oldestShardIndex{-1}; + std::uint64_t bitMask{0}; + for (auto i = 0; i < numShards; ++i) + { + auto shardIndex{createShard(data, *shardStore, numShards)}; + if (!BEAST_EXPECT( + shardIndex && *shardIndex >= 1 && *shardIndex <= numShards)) + return; + + bitMask |= (1ll << *shardIndex); + + if (oldestShardIndex == -1) + oldestShardIndex = *shardIndex; + } + + // The number of open shards exceeds the open limit by one. + // A sweep will close enough shards to be within the limit. + shardStore->sweep(); + + // Read from the closed shard and automatically open it + auto const ledgerSeq{shardStore->lastLedgerSeq(oldestShardIndex)}; + auto const index{ledgerSeq - ledgersPerShard - 1}; + BEAST_EXPECT(shardStore->fetchNodeObject( + data.ledgers_[index]->info().hash, ledgerSeq)); } public: @@ -1324,19 +1332,24 @@ public: void run() override { - testAll("nudb"); + std::uint64_t const seedValue = 51; -#if RIPPLE_ROCKSDB_AVAILABLE -// testAll ("rocksdb"); -#endif - -#if RIPPLE_ENABLE_SQLITE_BACKEND_TESTS - testAll("sqlite"); -#endif + testStandalone(); + testCreateShard(seedValue); + testReopenDatabase(seedValue + 10); + testGetCompleteShards(seedValue + 20); + testPrepareShard(seedValue + 30); + testImportShard(seedValue + 40); + testCorruptedDatabase(seedValue + 50); + testIllegalFinalKey(seedValue + 60); + testImport(seedValue + 70); + testImportWithHistoricalPaths(seedValue + 80); + testPrepareWithHistoricalPaths(seedValue + 90); + testOpenShardManagement(seedValue + 100); } }; -BEAST_DEFINE_TESTSUITE(DatabaseShard, NodeStore, ripple); +BEAST_DEFINE_TESTSUITE_MANUAL(DatabaseShard, NodeStore, ripple); } // namespace NodeStore } // namespace ripple diff --git a/src/test/nodestore/TestBase.h b/src/test/nodestore/TestBase.h index 650d11b51..9ccf2e169 100644 --- a/src/test/nodestore/TestBase.h +++ b/src/test/nodestore/TestBase.h @@ -217,7 +217,7 @@ public: for (int i = 0; i < batch.size(); ++i) { std::shared_ptr object = - db.fetch(batch[i]->getHash(), 0); + db.fetchNodeObject(batch[i]->getHash(), 0); if (object != nullptr) pCopy->push_back(object);