From f634666dc6e8bd7198c4cd79f94841a688936681 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 5 Jan 2015 13:37:58 -0800 Subject: [PATCH] Make rocksdbquick settings default: This removes the old default configuration for the "rocksdb" backend and replaces it with the configuration that was formerly available using the experimental backend "rocksdbquick". The new configuration setting improves the performance of the key/value database by changing the compaction style and tuning the size parameters for the typical rippled workload. Testing shows a decrease in I/O spikes for both reading and writing. --- Builds/VisualStudio2013/RippleD.vcxproj | 3 - .../VisualStudio2013/RippleD.vcxproj.filters | 3 - .../nodestore/backend/RocksDBFactory.cpp | 150 +++---- .../nodestore/backend/RocksDBQuickFactory.cpp | 374 ------------------ src/ripple/nodestore/tests/Backend.test.cpp | 10 +- src/ripple/nodestore/tests/Database.test.cpp | 8 +- src/ripple/unity/nodestore.cpp | 1 - 7 files changed, 70 insertions(+), 479 deletions(-) delete mode 100644 src/ripple/nodestore/backend/RocksDBQuickFactory.cpp diff --git a/Builds/VisualStudio2013/RippleD.vcxproj b/Builds/VisualStudio2013/RippleD.vcxproj index 93d6463305..f861950d60 100644 --- a/Builds/VisualStudio2013/RippleD.vcxproj +++ b/Builds/VisualStudio2013/RippleD.vcxproj @@ -2379,9 +2379,6 @@ True - - True - diff --git a/Builds/VisualStudio2013/RippleD.vcxproj.filters b/Builds/VisualStudio2013/RippleD.vcxproj.filters index 166ab1bc80..1ae4ffc22f 100644 --- a/Builds/VisualStudio2013/RippleD.vcxproj.filters +++ b/Builds/VisualStudio2013/RippleD.vcxproj.filters @@ -3378,9 +3378,6 @@ ripple\nodestore\backend - - ripple\nodestore\backend - ripple\nodestore diff --git a/src/ripple/nodestore/backend/RocksDBFactory.cpp b/src/ripple/nodestore/backend/RocksDBFactory.cpp index ab4b04bd19..e72a30edc1 100644 --- a/src/ripple/nodestore/backend/RocksDBFactory.cpp +++ b/src/ripple/nodestore/backend/RocksDBFactory.cpp @@ -26,7 +26,6 @@ #include // VFALCO Bad dependency #include #include -#include #include #include #include @@ -86,7 +85,6 @@ public: class RocksDBBackend : public Backend - , public BatchWriter::Callback { private: std::atomic m_deletePath; @@ -94,8 +92,6 @@ private: public: beast::Journal m_journal; size_t const m_keyBytes; - Scheduler& m_scheduler; - BatchWriter m_batch; std::string m_name; std::unique_ptr m_db; @@ -104,98 +100,71 @@ public: : m_deletePath (false) , m_journal (journal) , m_keyBytes (keyBytes) - , m_scheduler (scheduler) - , m_batch (*this, scheduler) , m_name (keyValues ["path"].toStdString ()) { if (m_name.empty()) throw std::runtime_error ("Missing path in RocksDBFactory backend"); + // Defaults + std::uint64_t budget = 512 * 1024 * 1024; // 512MB + std::string style("level"); + std::uint64_t threads=4; + + if (!keyValues["budget"].isEmpty()) + budget = keyValues["budget"].getIntValue(); + + if (!keyValues["style"].isEmpty()) + style = keyValues["style"].toStdString(); + + if (!keyValues["threads"].isEmpty()) + threads = keyValues["threads"].getIntValue(); + + + // Set options rocksdb::Options options; - rocksdb::BlockBasedTableOptions table_options; options.create_if_missing = true; options.env = env; - if (keyValues["cache_mb"].isEmpty()) - { - table_options.block_cache = rocksdb::NewLRUCache (getConfig ().getSize (siHashNodeDBCache) * 1024 * 1024); - } - else - { - table_options.block_cache = rocksdb::NewLRUCache (keyValues["cache_mb"].getIntValue() * 1024L * 1024L); - } + if (style == "level") + options.OptimizeLevelStyleCompaction(budget); - if (keyValues["filter_bits"].isEmpty()) - { - if (getConfig ().NODE_SIZE >= 2) - table_options.filter_policy.reset (rocksdb::NewBloomFilterPolicy (10)); - } - else if (keyValues["filter_bits"].getIntValue() != 0) - { - table_options.filter_policy.reset (rocksdb::NewBloomFilterPolicy (keyValues["filter_bits"].getIntValue())); - } + if (style == "universal") + options.OptimizeUniversalStyleCompaction(budget); - if (! keyValues["open_files"].isEmpty()) - { - options.max_open_files = keyValues["open_files"].getIntValue(); - } + if (style == "point") + options.OptimizeForPointLookup(budget / 1024 / 1024); // In MB - if (! keyValues["file_size_mb"].isEmpty()) - { - options.target_file_size_base = 1024 * 1024 * keyValues["file_size_mb"].getIntValue(); - options.max_bytes_for_level_base = 5 * options.target_file_size_base; - options.write_buffer_size = 2 * options.target_file_size_base; - } + options.IncreaseParallelism(threads); - if (! keyValues["file_size_mult"].isEmpty()) - { - options.target_file_size_multiplier = keyValues["file_size_mult"].getIntValue(); - } + // Allows hash indexes in blocks + options.prefix_extractor.reset(rocksdb::NewNoopTransform()); - if (! keyValues["bg_threads"].isEmpty()) - { - options.env->SetBackgroundThreads - (keyValues["bg_threads"].getIntValue(), rocksdb::Env::LOW); - } + // overrride OptimizeLevelStyleCompaction + options.min_write_buffer_number_to_merge = 1; + + rocksdb::BlockBasedTableOptions table_options; + // Use hash index + table_options.index_type = + rocksdb::BlockBasedTableOptions::kHashSearch; + table_options.filter_policy.reset( + rocksdb::NewBloomFilterPolicy(10)); + options.table_factory.reset( + NewBlockBasedTableFactory(table_options)); + + // Higher values make reads slower + // table_options.block_size = 4096; - if (! keyValues["high_threads"].isEmpty()) - { - auto const highThreads = keyValues["high_threads"].getIntValue(); - options.env->SetBackgroundThreads (highThreads, rocksdb::Env::HIGH); + // No point when DatabaseImp has a cache + // table_options.block_cache = + // rocksdb::NewLRUCache(64 * 1024 * 1024); - // If we have high-priority threads, presumably we want to - // use them for background flushes - if (highThreads > 0) - options.max_background_flushes = highThreads; - } - - if (! keyValues["compression"].isEmpty ()) - { - if (keyValues["compression"].getIntValue () == 0) - { - options.compression = rocksdb::kNoCompression; - } - } - - if (! keyValues["block_size"].isEmpty ()) - { - table_options.block_size = keyValues["block_size"].getIntValue (); - } - - if (! keyValues["universal_compaction"].isEmpty ()) - { - if (keyValues["universal_compaction"].getIntValue () != 0) - { - options.compaction_style = rocksdb:: kCompactionStyleUniversal; - options.min_write_buffer_number_to_merge = 2; - options.max_write_buffer_number = 6; - options.write_buffer_size = 6 * options.target_file_size_base; - } - } - - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.memtable_factory.reset(rocksdb::NewHashSkipListRepFactory()); + // Alternative: + // options.memtable_factory.reset( + // rocksdb::NewHashCuckooRepFactory(options.write_buffer_size)); rocksdb::DB* db = nullptr; + rocksdb::Status status = rocksdb::DB::Open (options, m_name, &db); if (!status.ok () || !db) throw std::runtime_error (std::string("Unable to open/create RocksDB: ") + status.ToString()); @@ -274,29 +243,32 @@ public: void store (NodeObject::ref object) { - m_batch.store (object); + storeBatch(Batch{object}); } void storeBatch (Batch const& batch) { rocksdb::WriteBatch wb; - + EncodedBlob encoded; for (auto const& e : batch) { encoded.prepare (e); - wb.Put ( - rocksdb::Slice (reinterpret_cast ( - encoded.getKey ()), m_keyBytes), - rocksdb::Slice (reinterpret_cast ( - encoded.getData ()), encoded.getSize ())); + wb.Put( + rocksdb::Slice(reinterpret_cast(encoded.getKey()), + m_keyBytes), + rocksdb::Slice(reinterpret_cast(encoded.getData()), + encoded.getSize())); } - rocksdb::WriteOptions const options; + rocksdb::WriteOptions options; + // Crucial to ensure good write speed and non-blocking writes to memtable + options.disableWAL = true; + auto ret = m_db->Write (options, &wb); if (!ret.ok ()) @@ -342,7 +314,7 @@ public: int getWriteLoad () { - return m_batch.getWriteLoad (); + return 0; } void @@ -367,12 +339,12 @@ class RocksDBFactory : public Factory public: RocksDBEnv m_env; - RocksDBFactory () + RocksDBFactory() { Manager::instance().insert(*this); } - ~RocksDBFactory () + ~RocksDBFactory() { Manager::instance().erase(*this); } diff --git a/src/ripple/nodestore/backend/RocksDBQuickFactory.cpp b/src/ripple/nodestore/backend/RocksDBQuickFactory.cpp deleted file mode 100644 index 559fd7da23..0000000000 --- a/src/ripple/nodestore/backend/RocksDBQuickFactory.cpp +++ /dev/null @@ -1,374 +0,0 @@ -//------------------------------------------------------------------------------ -/* - This file is part of rippled: https://github.com/ripple/rippled - Copyright (c) 2012, 2013 Ripple Labs Inc. - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ -//============================================================================== - -#include - -#include - -#if RIPPLE_ROCKSDB_AVAILABLE - -#include // VFALCO Bad dependency -#include -#include -#include -#include -#include -#include -#include // - -namespace ripple { -namespace NodeStore { - -class RockDBQuickEnv : public rocksdb::EnvWrapper -{ -public: - RockDBQuickEnv () - : EnvWrapper (rocksdb::Env::Default()) - { - } - - struct ThreadParams - { - ThreadParams (void (*f_)(void*), void* a_) - : f (f_) - , a (a_) - { - } - - void (*f)(void*); - void* a; - }; - - static - void - thread_entry (void* ptr) - { - ThreadParams* const p (reinterpret_cast (ptr)); - void (*f)(void*) = p->f; - void* a (p->a); - delete p; - - static std::atomic n; - std::size_t const id (++n); - std::stringstream ss; - ss << "rocksdb #" << id; - beast::Thread::setCurrentThreadName (ss.str()); - - (*f)(a); - } - - void - StartThread (void (*f)(void*), void* a) - { - ThreadParams* const p (new ThreadParams (f, a)); - EnvWrapper::StartThread (&RockDBQuickEnv::thread_entry, p); - } -}; - -//------------------------------------------------------------------------------ - -class RocksDBQuickBackend - : public Backend -{ -private: - std::atomic m_deletePath; - -public: - beast::Journal m_journal; - size_t const m_keyBytes; - std::string m_name; - std::unique_ptr m_db; - - RocksDBQuickBackend (int keyBytes, Parameters const& keyValues, - Scheduler& scheduler, beast::Journal journal, RockDBQuickEnv* env) - : m_journal (journal) - , m_keyBytes (keyBytes) - , m_name (keyValues ["path"].toStdString ()) - { - if (m_name.empty()) - throw std::runtime_error ("Missing path in RocksDBFactory backend"); - - // Defaults - std::uint64_t budget = 512 * 1024 * 1024; // 512MB - std::string style("level"); - std::uint64_t threads=4; - - if (!keyValues["budget"].isEmpty()) - budget = keyValues["budget"].getIntValue(); - - if (!keyValues["style"].isEmpty()) - style = keyValues["style"].toStdString(); - - if (!keyValues["threads"].isEmpty()) - threads = keyValues["threads"].getIntValue(); - - - // Set options - rocksdb::Options options; - options.create_if_missing = true; - options.env = env; - - if (style == "level") - options.OptimizeLevelStyleCompaction(budget); - - if (style == "universal") - options.OptimizeUniversalStyleCompaction(budget); - - if (style == "point") - options.OptimizeForPointLookup(budget / 1024 / 1024); // In MB - - options.IncreaseParallelism(threads); - - // Allows hash indexes in blocks - options.prefix_extractor.reset(rocksdb::NewNoopTransform()); - - // overrride OptimizeLevelStyleCompaction - options.min_write_buffer_number_to_merge = 1; - - rocksdb::BlockBasedTableOptions table_options; - // Use hash index - table_options.index_type = - rocksdb::BlockBasedTableOptions::kHashSearch; - table_options.filter_policy.reset( - rocksdb::NewBloomFilterPolicy(10)); - options.table_factory.reset( - NewBlockBasedTableFactory(table_options)); - - // Higher values make reads slower - // table_options.block_size = 4096; - - // No point when DatabaseImp has a cache - // table_options.block_cache = - // rocksdb::NewLRUCache(64 * 1024 * 1024); - - options.memtable_factory.reset(rocksdb::NewHashSkipListRepFactory()); - // Alternative: - // options.memtable_factory.reset( - // rocksdb::NewHashCuckooRepFactory(options.write_buffer_size)); - - rocksdb::DB* db = nullptr; - - rocksdb::Status status = rocksdb::DB::Open (options, m_name, &db); - if (!status.ok () || !db) - throw std::runtime_error (std::string("Unable to open/create RocksDB: ") + status.ToString()); - - m_db.reset (db); - } - - ~RocksDBQuickBackend () - { - if (m_deletePath) - { - m_db.reset(); - boost::filesystem::path dir = m_name; - boost::filesystem::remove_all (dir); - } - } - - std::string - getName() - { - return m_name; - } - - //-------------------------------------------------------------------------- - - Status - fetch (void const* key, NodeObject::Ptr* pObject) - { - pObject->reset (); - - Status status (ok); - - rocksdb::ReadOptions const options; - rocksdb::Slice const slice (static_cast (key), m_keyBytes); - - std::string string; - - rocksdb::Status getStatus = m_db->Get (options, slice, &string); - - if (getStatus.ok ()) - { - DecodedBlob decoded (key, string.data (), string.size ()); - - if (decoded.wasOk ()) - { - *pObject = decoded.createObject (); - } - else - { - // Decoding failed, probably corrupted! - // - status = dataCorrupt; - } - } - else - { - if (getStatus.IsCorruption ()) - { - status = dataCorrupt; - } - else if (getStatus.IsNotFound ()) - { - status = notFound; - } - else - { - status = Status (customCode + getStatus.code()); - - m_journal.error << getStatus.ToString (); - } - } - - return status; - } - - void - store (NodeObject::ref object) - { - storeBatch(Batch{object}); - } - - void - storeBatch (Batch const& batch) - { - rocksdb::WriteBatch wb; - - EncodedBlob encoded; - - for (auto const& e : batch) - { - encoded.prepare (e); - - wb.Put( - rocksdb::Slice(reinterpret_cast(encoded.getKey()), - m_keyBytes), - rocksdb::Slice(reinterpret_cast(encoded.getData()), - encoded.getSize())); - } - - rocksdb::WriteOptions options; - - // Crucial to ensure good write speed and non-blocking writes to memtable - options.disableWAL = true; - - auto ret = m_db->Write (options, &wb); - - if (!ret.ok ()) - throw std::runtime_error ("storeBatch failed: " + ret.ToString()); - } - - void - for_each (std::function f) - { - rocksdb::ReadOptions const options; - - std::unique_ptr it (m_db->NewIterator (options)); - - for (it->SeekToFirst (); it->Valid (); it->Next ()) - { - if (it->key ().size () == m_keyBytes) - { - DecodedBlob decoded (it->key ().data (), - it->value ().data (), - it->value ().size ()); - - if (decoded.wasOk ()) - { - f (decoded.createObject ()); - } - else - { - // Uh oh, corrupted data! - if (m_journal.fatal) m_journal.fatal << - "Corrupt NodeObject #" << uint256 (it->key ().data ()); - } - } - else - { - // VFALCO NOTE What does it mean to find an - // incorrectly sized key? Corruption? - if (m_journal.fatal) m_journal.fatal << - "Bad key size = " << it->key ().size (); - } - } - } - - int - getWriteLoad () - { - return 0; - } - - void - setDeletePath() override - { - m_deletePath = true; - } - - //-------------------------------------------------------------------------- - - void - writeBatch (Batch const& batch) - { - storeBatch (batch); - } -}; - -//------------------------------------------------------------------------------ - -class RocksDBQuickFactory : public Factory -{ -public: - RockDBQuickEnv m_env; - - RocksDBQuickFactory() - { - Manager::instance().insert(*this); - } - - ~RocksDBQuickFactory() - { - Manager::instance().erase(*this); - } - - std::string - getName () const - { - return "RocksDBQuick"; - } - - std::unique_ptr - createInstance ( - size_t keyBytes, - Parameters const& keyValues, - Scheduler& scheduler, - beast::Journal journal) - { - return std::make_unique ( - keyBytes, keyValues, scheduler, journal, &m_env); - } -}; - -static RocksDBQuickFactory rocksDBQuickFactory; - -} -} - -#endif diff --git a/src/ripple/nodestore/tests/Backend.test.cpp b/src/ripple/nodestore/tests/Backend.test.cpp index 3348fb43e5..692599104e 100644 --- a/src/ripple/nodestore/tests/Backend.test.cpp +++ b/src/ripple/nodestore/tests/Backend.test.cpp @@ -96,17 +96,17 @@ public: testBackend ("leveldb", seedValue); - #ifdef RIPPLE_ENABLE_SQLITE_BACKEND_TESTS - testBackend ("sqlite", seedValue); - #endif - #if RIPPLE_HYPERLEVELDB_AVAILABLE testBackend ("hyperleveldb", seedValue); #endif - #if RIPPLE_ROCKSDB_AVAILABLE +#if RIPPLE_ROCKSDB_AVAILABLE testBackend ("rocksdb", seedValue); #endif + + #ifdef RIPPLE_ENABLE_SQLITE_BACKEND_TESTS + testBackend ("sqlite", seedValue); + #endif } }; diff --git a/src/ripple/nodestore/tests/Database.test.cpp b/src/ripple/nodestore/tests/Database.test.cpp index 0ee219136c..bb7e94b72a 100644 --- a/src/ripple/nodestore/tests/Database.test.cpp +++ b/src/ripple/nodestore/tests/Database.test.cpp @@ -203,14 +203,14 @@ public: { testImport ("leveldb", "leveldb", seedValue); - #if RIPPLE_ROCKSDB_AVAILABLE - testImport ("rocksdb", "rocksdb", seedValue); - #endif - #if RIPPLE_HYPERLEVELDB_AVAILABLE testImport ("hyperleveldb", "hyperleveldb", seedValue); #endif + #if RIPPLE_ROCKSDB_AVAILABLE + testImport ("rocksdb", "rocksdb", seedValue); + #endif + #if RIPPLE_ENABLE_SQLITE_BACKEND_TESTS testImport ("sqlite", "sqlite", seedValue); #endif diff --git a/src/ripple/unity/nodestore.cpp b/src/ripple/unity/nodestore.cpp index da0bd13a5b..dca1b71d97 100644 --- a/src/ripple/unity/nodestore.cpp +++ b/src/ripple/unity/nodestore.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include