From d5c3f0c9cf35c2ad01794f426dce03f8b80fb2c3 Mon Sep 17 00:00:00 2001 From: Mark Travis Date: Sun, 30 Nov 2014 00:04:55 -0800 Subject: [PATCH] Stability bugfixes for online delete SHAMapStore: The correct ledger age is necessary for checking health status, and the previous behavior caused the online deletion process to abort if the process took too long. The tuning parameter added and the parameter whose default was modified both minimize impact of SQL DELETE operations by decreasing the default batch size for deletes and for increasing the backoff period between deletion batches. These parameters decrease contention for the SQLite and I/O with the trade-off of longer processing time for online delete. Online-delete is not a time-critical function, so a little slowness in wall-clock time is not harmful. --- src/ripple/app/misc/SHAMapStore.h | 4 +++- src/ripple/app/misc/SHAMapStoreImp.cpp | 17 +++++++++-------- src/ripple/app/misc/SHAMapStoreImp.h | 4 ---- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/ripple/app/misc/SHAMapStore.h b/src/ripple/app/misc/SHAMapStore.h index 4a1682449..9786bc544 100644 --- a/src/ripple/app/misc/SHAMapStore.h +++ b/src/ripple/app/misc/SHAMapStore.h @@ -43,7 +43,9 @@ public: beast::StringPairArray nodeDatabase; beast::StringPairArray ephemeralNodeDatabase; std::string databasePath; - std::uint32_t deleteBatch = 1000; + std::uint32_t deleteBatch = 100; + std::uint32_t backOff = 100; + std::int32_t ageThreshold = 60; }; SHAMapStore (Stoppable& parent) : Stoppable ("SHAMapStore", parent) {} diff --git a/src/ripple/app/misc/SHAMapStoreImp.cpp b/src/ripple/app/misc/SHAMapStoreImp.cpp index fa98fb841..2232103b1 100644 --- a/src/ripple/app/misc/SHAMapStoreImp.cpp +++ b/src/ripple/app/misc/SHAMapStoreImp.cpp @@ -563,7 +563,7 @@ SHAMapStoreImp::clearSql (DatabaseCon& database, return; if (min < lastRotated) std::this_thread::sleep_for ( - std::chrono::microseconds (pause_)); + std::chrono::milliseconds (setup_.backOff)); } journal_.debug << "finished: " << deleteQuery; } @@ -634,15 +634,12 @@ SHAMapStoreImp::health() return Health::ok; NetworkOPs::OperatingMode mode = netOPs_->getOperatingMode(); - std::uint32_t age = netOPs_->getNetworkTimeNC() - ( - validatedLedger_->getCloseTimeNC() - - validatedLedger_->getCloseResolution()); - if (mode != NetworkOPs::omFULL || age >= ageTooHigh_) + std::int32_t age = ledgerMaster_->getValidatedLedgerAge(); + if (mode != NetworkOPs::omFULL || age >= setup_.ageThreshold) { - journal_.warning << "server not healthy, not deleting. state: " - << mode << " age " << age << " age threshold " - << ageTooHigh_; + journal_.warning << "Not deleting. state: " << mode << " age " << age + << " age threshold " << setup_.ageThreshold; healthy_ = false; } @@ -702,6 +699,10 @@ setup_SHAMapStore (Config const& c) setup.databasePath = c.DATABASE_PATH; if (c.nodeDatabase["delete_batch"].isNotEmpty()) setup.deleteBatch = c.nodeDatabase["delete_batch"].getIntValue(); + if (c.nodeDatabase["backOff"].isNotEmpty()) + setup.backOff = c.nodeDatabase["backOff"].getIntValue(); + if (c.nodeDatabase["age_threshold"].isNotEmpty()) + setup.ageThreshold = c.nodeDatabase["age_threshold"].getIntValue(); return setup; } diff --git a/src/ripple/app/misc/SHAMapStoreImp.h b/src/ripple/app/misc/SHAMapStoreImp.h index c6d73a41e..ee704c1d2 100644 --- a/src/ripple/app/misc/SHAMapStoreImp.h +++ b/src/ripple/app/misc/SHAMapStoreImp.h @@ -77,10 +77,6 @@ private: std::string const dbPrefix_ = "rippledb"; // check health/stop status as records are copied std::uint64_t const checkHealthInterval_ = 1000; - // microseconds to back off between sqlite deletion batches - std::uint32_t pause_ = 1000; - // seconds to compare against ledger age - std::uint16_t ageTooHigh_ = 60; // minimum # of ledgers to maintain for health of network std::uint32_t minimumDeletionInterval_ = 256;