rippled
Loading...
Searching...
No Matches
RocksDBFactory.cpp
1//------------------------------------------------------------------------------
2/*
3 This file is part of rippled: https://github.com/ripple/rippled
4 Copyright (c) 2012, 2013 Ripple Labs Inc.
5
6 Permission to use, copy, modify, and/or distribute this software for any
7 purpose with or without fee is hereby granted, provided that the above
8 copyright notice and this permission notice appear in all copies.
9
10 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17*/
18//==============================================================================
19
20#include <xrpld/unity/rocksdb.h>
21
22#if RIPPLE_ROCKSDB_AVAILABLE
23#include <xrpld/core/Config.h> // VFALCO Bad dependency
24#include <xrpld/nodestore/Factory.h>
25#include <xrpld/nodestore/Manager.h>
26#include <xrpld/nodestore/detail/BatchWriter.h>
27#include <xrpld/nodestore/detail/DecodedBlob.h>
28#include <xrpld/nodestore/detail/EncodedBlob.h>
29#include <xrpl/basics/ByteUtilities.h>
30#include <xrpl/basics/contract.h>
31#include <xrpl/basics/safe_cast.h>
32#include <xrpl/beast/core/CurrentThreadName.h>
33
34#include <atomic>
35#include <memory>
36
37namespace ripple {
38namespace NodeStore {
39
40class RocksDBEnv : public rocksdb::EnvWrapper
41{
42public:
43 RocksDBEnv() : EnvWrapper(rocksdb::Env::Default())
44 {
45 }
46
47 struct ThreadParams
48 {
49 ThreadParams(void (*f_)(void*), void* a_) : f(f_), a(a_)
50 {
51 }
52
53 void (*f)(void*);
54 void* a;
55 };
56
57 static void
58 thread_entry(void* ptr)
59 {
60 ThreadParams* const p(reinterpret_cast<ThreadParams*>(ptr));
61 void (*f)(void*) = p->f;
62 void* a(p->a);
63 delete p;
64
66 std::size_t const id(++n);
68 ss << "rocksdb #" << id;
70
71 (*f)(a);
72 }
73
74 void
75 StartThread(void (*f)(void*), void* a) override
76 {
77 ThreadParams* const p(new ThreadParams(f, a));
78 EnvWrapper::StartThread(&RocksDBEnv::thread_entry, p);
79 }
80};
81
82//------------------------------------------------------------------------------
83
84class RocksDBBackend : public Backend, public BatchWriter::Callback
85{
86private:
87 std::atomic<bool> m_deletePath;
88
89public:
90 beast::Journal m_journal;
91 size_t const m_keyBytes;
92 BatchWriter m_batch;
93 std::string m_name;
95 int fdRequired_ = 2048;
96 rocksdb::Options m_options;
97
98 RocksDBBackend(
99 int keyBytes,
100 Section const& keyValues,
101 Scheduler& scheduler,
102 beast::Journal journal,
103 RocksDBEnv* env)
104 : m_deletePath(false)
105 , m_journal(journal)
106 , m_keyBytes(keyBytes)
107 , m_batch(*this, scheduler)
108 {
109 if (!get_if_exists(keyValues, "path", m_name))
110 Throw<std::runtime_error>("Missing path in RocksDBFactory backend");
111
112 rocksdb::BlockBasedTableOptions table_options;
113 m_options.env = env;
114
115 bool hard_set =
116 keyValues.exists("hard_set") && get<bool>(keyValues, "hard_set");
117
118 if (keyValues.exists("cache_mb"))
119 {
120 auto size = get<int>(keyValues, "cache_mb");
121
122 if (!hard_set && size == 256)
123 size = 1024;
124
125 table_options.block_cache = rocksdb::NewLRUCache(megabytes(size));
126 }
127
128 if (auto const v = get<int>(keyValues, "filter_bits"))
129 {
130 bool const filter_blocks = !keyValues.exists("filter_full") ||
131 (get<int>(keyValues, "filter_full") == 0);
132 table_options.filter_policy.reset(
133 rocksdb::NewBloomFilterPolicy(v, filter_blocks));
134 }
135
136 if (get_if_exists(keyValues, "open_files", m_options.max_open_files))
137 {
138 if (!hard_set && m_options.max_open_files == 2000)
139 m_options.max_open_files = 8000;
140
141 fdRequired_ = m_options.max_open_files + 128;
142 }
143
144 if (keyValues.exists("file_size_mb"))
145 {
146 auto file_size_mb = get<int>(keyValues, "file_size_mb");
147
148 if (!hard_set && file_size_mb == 8)
149 file_size_mb = 256;
150
151 m_options.target_file_size_base = megabytes(file_size_mb);
152 m_options.max_bytes_for_level_base =
153 5 * m_options.target_file_size_base;
154 m_options.write_buffer_size = 2 * m_options.target_file_size_base;
155 }
156
158 keyValues, "file_size_mult", m_options.target_file_size_multiplier);
159
160 if (keyValues.exists("bg_threads"))
161 {
162 m_options.env->SetBackgroundThreads(
163 get<int>(keyValues, "bg_threads"), rocksdb::Env::LOW);
164 }
165
166 if (keyValues.exists("high_threads"))
167 {
168 auto const highThreads = get<int>(keyValues, "high_threads");
169 m_options.env->SetBackgroundThreads(
170 highThreads, rocksdb::Env::HIGH);
171
172 // If we have high-priority threads, presumably we want to
173 // use them for background flushes
174 if (highThreads > 0)
175 m_options.max_background_flushes = highThreads;
176 }
177
178 m_options.compression = rocksdb::kSnappyCompression;
179
180 get_if_exists(keyValues, "block_size", table_options.block_size);
181
182 if (keyValues.exists("universal_compaction") &&
183 (get<int>(keyValues, "universal_compaction") != 0))
184 {
185 m_options.compaction_style = rocksdb::kCompactionStyleUniversal;
186 m_options.min_write_buffer_number_to_merge = 2;
187 m_options.max_write_buffer_number = 6;
188 m_options.write_buffer_size = 6 * m_options.target_file_size_base;
189 }
190
191 if (keyValues.exists("bbt_options"))
192 {
193 auto const s = rocksdb::GetBlockBasedTableOptionsFromString(
194 table_options, get(keyValues, "bbt_options"), &table_options);
195 if (!s.ok())
196 Throw<std::runtime_error>(
197 std::string("Unable to set RocksDB bbt_options: ") +
198 s.ToString());
199 }
200
201 m_options.table_factory.reset(NewBlockBasedTableFactory(table_options));
202
203 if (keyValues.exists("options"))
204 {
205 auto const s = rocksdb::GetOptionsFromString(
206 m_options, get(keyValues, "options"), &m_options);
207 if (!s.ok())
208 Throw<std::runtime_error>(
209 std::string("Unable to set RocksDB options: ") +
210 s.ToString());
211 }
212
213 std::string s1, s2;
214 rocksdb::GetStringFromDBOptions(&s1, m_options, "; ");
215 rocksdb::GetStringFromColumnFamilyOptions(&s2, m_options, "; ");
216 JLOG(m_journal.debug()) << "RocksDB DBOptions: " << s1;
217 JLOG(m_journal.debug()) << "RocksDB CFOptions: " << s2;
218 }
219
220 ~RocksDBBackend() override
221 {
222 close();
223 }
224
225 void
226 open(bool createIfMissing) override
227 {
228 if (m_db)
229 {
230 UNREACHABLE(
231 "ripple::NodeStore::RocksDBBackend::open : database is already "
232 "open");
233 JLOG(m_journal.error()) << "database is already open";
234 return;
235 }
236 rocksdb::DB* db = nullptr;
237 m_options.create_if_missing = createIfMissing;
238 rocksdb::Status status = rocksdb::DB::Open(m_options, m_name, &db);
239 if (!status.ok() || !db)
240 Throw<std::runtime_error>(
241 std::string("Unable to open/create RocksDB: ") +
242 status.ToString());
243 m_db.reset(db);
244 }
245
246 bool
247 isOpen() override
248 {
249 return static_cast<bool>(m_db);
250 }
251
252 void
253 close() override
254 {
255 if (m_db)
256 {
257 m_db.reset();
258 if (m_deletePath)
259 {
260 boost::filesystem::path dir = m_name;
261 boost::filesystem::remove_all(dir);
262 }
263 }
264 }
265
267 getName() override
268 {
269 return m_name;
270 }
271
272 //--------------------------------------------------------------------------
273
274 Status
275 fetch(void const* key, std::shared_ptr<NodeObject>* pObject) override
276 {
277 XRPL_ASSERT(
278 m_db,
279 "ripple::NodeStore::RocksDBBackend::fetch : non-null database");
280 pObject->reset();
281
282 Status status(ok);
283
284 rocksdb::ReadOptions const options;
285 rocksdb::Slice const slice(static_cast<char const*>(key), m_keyBytes);
286
287 std::string string;
288
289 rocksdb::Status getStatus = m_db->Get(options, slice, &string);
290
291 if (getStatus.ok())
292 {
293 DecodedBlob decoded(key, string.data(), string.size());
294
295 if (decoded.wasOk())
296 {
297 *pObject = decoded.createObject();
298 }
299 else
300 {
301 // Decoding failed, probably corrupted!
302 //
304 }
305 }
306 else
307 {
308 if (getStatus.IsCorruption())
309 {
311 }
312 else if (getStatus.IsNotFound())
313 {
315 }
316 else
317 {
318 status =
319 Status(customCode + unsafe_cast<int>(getStatus.code()));
320
321 JLOG(m_journal.error()) << getStatus.ToString();
322 }
323 }
324
325 return status;
326 }
327
329 fetchBatch(std::vector<uint256 const*> const& hashes) override
330 {
332 results.reserve(hashes.size());
333 for (auto const& h : hashes)
334 {
336 Status status = fetch(h->begin(), &nObj);
337 if (status != ok)
338 results.push_back({});
339 else
340 results.push_back(nObj);
341 }
342
343 return {results, ok};
344 }
345
346 void
347 store(std::shared_ptr<NodeObject> const& object) override
348 {
349 m_batch.store(object);
350 }
351
352 void
353 storeBatch(Batch const& batch) override
354 {
355 XRPL_ASSERT(
356 m_db,
357 "ripple::NodeStore::RocksDBBackend::storeBatch : non-null "
358 "database");
359 rocksdb::WriteBatch wb;
360
361 for (auto const& e : batch)
362 {
363 EncodedBlob encoded(e);
364
365 wb.Put(
366 rocksdb::Slice(
367 reinterpret_cast<char const*>(encoded.getKey()),
368 m_keyBytes),
369 rocksdb::Slice(
370 reinterpret_cast<char const*>(encoded.getData()),
371 encoded.getSize()));
372 }
373
374 rocksdb::WriteOptions const options;
375
376 auto ret = m_db->Write(options, &wb);
377
378 if (!ret.ok())
379 Throw<std::runtime_error>("storeBatch failed: " + ret.ToString());
380 }
381
382 void
383 sync() override
384 {
385 }
386
387 void
389 {
390 XRPL_ASSERT(
391 m_db,
392 "ripple::NodeStore::RocksDBBackend::for_each : non-null database");
393 rocksdb::ReadOptions const options;
394
395 std::unique_ptr<rocksdb::Iterator> it(m_db->NewIterator(options));
396
397 for (it->SeekToFirst(); it->Valid(); it->Next())
398 {
399 if (it->key().size() == m_keyBytes)
400 {
401 DecodedBlob decoded(
402 it->key().data(), it->value().data(), it->value().size());
403
404 if (decoded.wasOk())
405 {
406 f(decoded.createObject());
407 }
408 else
409 {
410 // Uh oh, corrupted data!
411 JLOG(m_journal.fatal())
412 << "Corrupt NodeObject #" << it->key().ToString(true);
413 }
414 }
415 else
416 {
417 // VFALCO NOTE What does it mean to find an
418 // incorrectly sized key? Corruption?
419 JLOG(m_journal.fatal())
420 << "Bad key size = " << it->key().size();
421 }
422 }
423 }
424
425 int
426 getWriteLoad() override
427 {
428 return m_batch.getWriteLoad();
429 }
430
431 void
432 setDeletePath() override
433 {
434 m_deletePath = true;
435 }
436
437 //--------------------------------------------------------------------------
438
439 void
440 writeBatch(Batch const& batch) override
441 {
442 storeBatch(batch);
443 }
444
446 int
447 fdRequired() const override
448 {
449 return fdRequired_;
450 }
451};
452
453//------------------------------------------------------------------------------
454
455class RocksDBFactory : public Factory
456{
457public:
458 RocksDBEnv m_env;
459
460 RocksDBFactory()
461 {
462 Manager::instance().insert(*this);
463 }
464
465 ~RocksDBFactory() override
466 {
467 Manager::instance().erase(*this);
468 }
469
471 getName() const override
472 {
473 return "RocksDB";
474 }
475
477 createInstance(
478 size_t keyBytes,
479 Section const& keyValues,
481 Scheduler& scheduler,
482 beast::Journal journal) override
483 {
484 return std::make_unique<RocksDBBackend>(
485 keyBytes, keyValues, scheduler, journal, &m_env);
486 }
487};
488
489static RocksDBFactory rocksDBFactory;
490
491} // namespace NodeStore
492} // namespace ripple
493
494#endif
A generic endpoint for log messages.
Definition: Journal.h:60
Stream fatal() const
Definition: Journal.h:352
Stream error() const
Definition: Journal.h:346
Stream debug() const
Definition: Journal.h:328
T data(T... args)
T for_each(T... args)
void setCurrentThreadName(std::string_view newThreadName)
Changes the name of the caller thread.
Status
Return codes from Backend operations.
@ ok
No action required.
Definition: Disposition.h:29
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: algorithm.h:26
constexpr auto megabytes(T value) noexcept
Definition: ByteUtilities.h:34
bool get_if_exists(Section const &section, std::string const &name, T &v)
Definition: BasicConfig.h:386
void open(soci::session &s, BasicConfig const &config, std::string const &dbName)
Open a soci session.
Definition: SociDB.cpp:99
T get(Section const &section, std::string const &name, T const &defaultValue=T{})
Retrieve a key/value pair from a section.
Definition: BasicConfig.h:355
T push_back(T... args)
T reserve(T... args)
T reset(T... args)
T size(T... args)
T str(T... args)