rippled
RocksDBFactory.cpp
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2012, 2013 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #include <ripple/unity/rocksdb.h>
21 
22 #if RIPPLE_ROCKSDB_AVAILABLE
23 
24 #include <ripple/basics/ByteUtilities.h>
25 #include <ripple/basics/contract.h>
26 #include <ripple/beast/core/CurrentThreadName.h>
27 #include <ripple/core/Config.h> // VFALCO Bad dependency
28 #include <ripple/nodestore/Factory.h>
29 #include <ripple/nodestore/Manager.h>
30 #include <ripple/nodestore/impl/BatchWriter.h>
31 #include <ripple/nodestore/impl/DecodedBlob.h>
32 #include <ripple/nodestore/impl/EncodedBlob.h>
33 #include <atomic>
34 #include <memory>
35 
36 namespace ripple {
37 namespace NodeStore {
38 
39 class RocksDBEnv : public rocksdb::EnvWrapper
40 {
41 public:
42  RocksDBEnv() : EnvWrapper(rocksdb::Env::Default())
43  {
44  }
45 
46  struct ThreadParams
47  {
48  ThreadParams(void (*f_)(void*), void* a_) : f(f_), a(a_)
49  {
50  }
51 
52  void (*f)(void*);
53  void* a;
54  };
55 
56  static void
57  thread_entry(void* ptr)
58  {
59  ThreadParams* const p(reinterpret_cast<ThreadParams*>(ptr));
60  void (*f)(void*) = p->f;
61  void* a(p->a);
62  delete p;
63 
64  static std::atomic<std::size_t> n;
65  std::size_t const id(++n);
67  ss << "rocksdb #" << id;
69 
70  (*f)(a);
71  }
72 
73  void
74  StartThread(void (*f)(void*), void* a) override
75  {
76  ThreadParams* const p(new ThreadParams(f, a));
77  EnvWrapper::StartThread(&RocksDBEnv::thread_entry, p);
78  }
79 };
80 
81 //------------------------------------------------------------------------------
82 
83 class RocksDBBackend : public Backend, public BatchWriter::Callback
84 {
85 private:
86  std::atomic<bool> m_deletePath;
87 
88 public:
89  beast::Journal m_journal;
90  size_t const m_keyBytes;
91  BatchWriter m_batch;
92  std::string m_name;
94  int fdRequired_ = 2048;
95  rocksdb::Options m_options;
96 
97  RocksDBBackend(
98  int keyBytes,
99  Section const& keyValues,
100  Scheduler& scheduler,
101  beast::Journal journal,
102  RocksDBEnv* env)
103  : m_deletePath(false)
104  , m_journal(journal)
105  , m_keyBytes(keyBytes)
106  , m_batch(*this, scheduler)
107  {
108  if (!get_if_exists(keyValues, "path", m_name))
109  Throw<std::runtime_error>("Missing path in RocksDBFactory backend");
110 
111  rocksdb::BlockBasedTableOptions table_options;
112  m_options.env = env;
113 
114  if (keyValues.exists("cache_mb"))
115  table_options.block_cache = rocksdb::NewLRUCache(
116  get<int>(keyValues, "cache_mb") * megabytes(1));
117 
118  if (auto const v = get<int>(keyValues, "filter_bits"))
119  {
120  bool const filter_blocks = !keyValues.exists("filter_full") ||
121  (get<int>(keyValues, "filter_full") == 0);
122  table_options.filter_policy.reset(
123  rocksdb::NewBloomFilterPolicy(v, filter_blocks));
124  }
125 
126  if (get_if_exists(keyValues, "open_files", m_options.max_open_files))
127  fdRequired_ = m_options.max_open_files;
128 
129  if (keyValues.exists("file_size_mb"))
130  {
131  m_options.target_file_size_base =
132  megabytes(1) * get<int>(keyValues, "file_size_mb");
133  m_options.max_bytes_for_level_base =
134  5 * m_options.target_file_size_base;
135  m_options.write_buffer_size = 2 * m_options.target_file_size_base;
136  }
137 
139  keyValues, "file_size_mult", m_options.target_file_size_multiplier);
140 
141  if (keyValues.exists("bg_threads"))
142  {
143  m_options.env->SetBackgroundThreads(
144  get<int>(keyValues, "bg_threads"), rocksdb::Env::LOW);
145  }
146 
147  if (keyValues.exists("high_threads"))
148  {
149  auto const highThreads = get<int>(keyValues, "high_threads");
150  m_options.env->SetBackgroundThreads(
151  highThreads, rocksdb::Env::HIGH);
152 
153  // If we have high-priority threads, presumably we want to
154  // use them for background flushes
155  if (highThreads > 0)
156  m_options.max_background_flushes = highThreads;
157  }
158 
159  m_options.compression = rocksdb::kSnappyCompression;
160 
161  get_if_exists(keyValues, "block_size", table_options.block_size);
162 
163  if (keyValues.exists("universal_compaction") &&
164  (get<int>(keyValues, "universal_compaction") != 0))
165  {
166  m_options.compaction_style = rocksdb::kCompactionStyleUniversal;
167  m_options.min_write_buffer_number_to_merge = 2;
168  m_options.max_write_buffer_number = 6;
169  m_options.write_buffer_size = 6 * m_options.target_file_size_base;
170  }
171 
172  if (keyValues.exists("bbt_options"))
173  {
174  auto const s = rocksdb::GetBlockBasedTableOptionsFromString(
175  table_options,
176  get<std::string>(keyValues, "bbt_options"),
177  &table_options);
178  if (!s.ok())
179  Throw<std::runtime_error>(
180  std::string("Unable to set RocksDB bbt_options: ") +
181  s.ToString());
182  }
183 
184  m_options.table_factory.reset(NewBlockBasedTableFactory(table_options));
185 
186  if (keyValues.exists("options"))
187  {
188  auto const s = rocksdb::GetOptionsFromString(
189  m_options, get<std::string>(keyValues, "options"), &m_options);
190  if (!s.ok())
191  Throw<std::runtime_error>(
192  std::string("Unable to set RocksDB options: ") +
193  s.ToString());
194  }
195 
196  std::string s1, s2;
197  rocksdb::GetStringFromDBOptions(&s1, m_options, "; ");
198  rocksdb::GetStringFromColumnFamilyOptions(&s2, m_options, "; ");
199  JLOG(m_journal.debug()) << "RocksDB DBOptions: " << s1;
200  JLOG(m_journal.debug()) << "RocksDB CFOptions: " << s2;
201  }
202 
203  ~RocksDBBackend() override
204  {
205  close();
206  }
207 
208  void
209  open(bool createIfMissing) override
210  {
211  if (m_db)
212  {
213  assert(false);
214  JLOG(m_journal.error()) << "database is already open";
215  return;
216  }
217  rocksdb::DB* db = nullptr;
218  m_options.create_if_missing = createIfMissing;
219  rocksdb::Status status = rocksdb::DB::Open(m_options, m_name, &db);
220  if (!status.ok() || !db)
221  Throw<std::runtime_error>(
222  std::string("Unable to open/create RocksDB: ") +
223  status.ToString());
224  m_db.reset(db);
225  }
226 
227  bool
228  isOpen() override
229  {
230  return static_cast<bool>(m_db);
231  }
232 
233  void
234  close() override
235  {
236  if (m_db)
237  {
238  m_db.reset();
239  if (m_deletePath)
240  {
241  boost::filesystem::path dir = m_name;
242  boost::filesystem::remove_all(dir);
243  }
244  }
245  }
246 
248  getName() override
249  {
250  return m_name;
251  }
252 
253  //--------------------------------------------------------------------------
254 
255  Status
256  fetch(void const* key, std::shared_ptr<NodeObject>* pObject) override
257  {
258  assert(m_db);
259  pObject->reset();
260 
261  Status status(ok);
262 
263  rocksdb::ReadOptions const options;
264  rocksdb::Slice const slice(static_cast<char const*>(key), m_keyBytes);
265 
266  std::string string;
267 
268  rocksdb::Status getStatus = m_db->Get(options, slice, &string);
269 
270  if (getStatus.ok())
271  {
272  DecodedBlob decoded(key, string.data(), string.size());
273 
274  if (decoded.wasOk())
275  {
276  *pObject = decoded.createObject();
277  }
278  else
279  {
280  // Decoding failed, probably corrupted!
281  //
283  }
284  }
285  else
286  {
287  if (getStatus.IsCorruption())
288  {
290  }
291  else if (getStatus.IsNotFound())
292  {
293  status = notFound;
294  }
295  else
296  {
297  status = Status(customCode + getStatus.code());
298 
299  JLOG(m_journal.error()) << getStatus.ToString();
300  }
301  }
302 
303  return status;
304  }
305 
307  fetchBatch(std::vector<uint256 const*> const& hashes) override
308  {
310  results.reserve(hashes.size());
311  for (auto const& h : hashes)
312  {
314  Status status = fetch(h->begin(), &nObj);
315  if (status != ok)
316  results.push_back({});
317  else
318  results.push_back(nObj);
319  }
320 
321  return {results, ok};
322  }
323 
324  void
325  store(std::shared_ptr<NodeObject> const& object) override
326  {
327  m_batch.store(object);
328  }
329 
330  void
331  storeBatch(Batch const& batch) override
332  {
333  assert(m_db);
334  rocksdb::WriteBatch wb;
335 
336  EncodedBlob encoded;
337 
338  for (auto const& e : batch)
339  {
340  encoded.prepare(e);
341 
342  wb.Put(
343  rocksdb::Slice(
344  reinterpret_cast<char const*>(encoded.getKey()),
345  m_keyBytes),
346  rocksdb::Slice(
347  reinterpret_cast<char const*>(encoded.getData()),
348  encoded.getSize()));
349  }
350 
351  rocksdb::WriteOptions const options;
352 
353  auto ret = m_db->Write(options, &wb);
354 
355  if (!ret.ok())
356  Throw<std::runtime_error>("storeBatch failed: " + ret.ToString());
357  }
358 
359  void
360  sync() override
361  {
362  }
363 
364  void
366  {
367  assert(m_db);
368  rocksdb::ReadOptions const options;
369 
370  std::unique_ptr<rocksdb::Iterator> it(m_db->NewIterator(options));
371 
372  for (it->SeekToFirst(); it->Valid(); it->Next())
373  {
374  if (it->key().size() == m_keyBytes)
375  {
376  DecodedBlob decoded(
377  it->key().data(), it->value().data(), it->value().size());
378 
379  if (decoded.wasOk())
380  {
381  f(decoded.createObject());
382  }
383  else
384  {
385  // Uh oh, corrupted data!
386  JLOG(m_journal.fatal())
387  << "Corrupt NodeObject #" << it->key().ToString(true);
388  }
389  }
390  else
391  {
392  // VFALCO NOTE What does it mean to find an
393  // incorrectly sized key? Corruption?
394  JLOG(m_journal.fatal())
395  << "Bad key size = " << it->key().size();
396  }
397  }
398  }
399 
400  int
401  getWriteLoad() override
402  {
403  return m_batch.getWriteLoad();
404  }
405 
406  void
407  setDeletePath() override
408  {
409  m_deletePath = true;
410  }
411 
412  //--------------------------------------------------------------------------
413 
414  void
415  writeBatch(Batch const& batch) override
416  {
417  storeBatch(batch);
418  }
419 
421  int
422  fdRequired() const override
423  {
424  return fdRequired_;
425  }
426 };
427 
428 //------------------------------------------------------------------------------
429 
430 class RocksDBFactory : public Factory
431 {
432 public:
433  RocksDBEnv m_env;
434 
435  RocksDBFactory()
436  {
437  Manager::instance().insert(*this);
438  }
439 
440  ~RocksDBFactory() override
441  {
442  Manager::instance().erase(*this);
443  }
444 
446  getName() const override
447  {
448  return "RocksDB";
449  }
450 
452  createInstance(
453  size_t keyBytes,
454  Section const& keyValues,
455  std::size_t,
456  Scheduler& scheduler,
457  beast::Journal journal) override
458  {
459  return std::make_unique<RocksDBBackend>(
460  keyBytes, keyValues, scheduler, journal, &m_env);
461  }
462 };
463 
464 static RocksDBFactory rocksDBFactory;
465 
466 } // namespace NodeStore
467 } // namespace ripple
468 
469 #endif
beast::Journal::fatal
Stream fatal() const
Definition: Journal.h:339
std::for_each
T for_each(T... args)
std::string
STL class.
std::shared_ptr< NodeObject >
ripple::NodeStore::ok
@ ok
Definition: nodestore/Types.h:45
ripple::NodeStore::Manager::erase
virtual void erase(Factory &factory)=0
Remove a factory.
std::pair
std::vector::reserve
T reserve(T... args)
std::vector
STL class.
std::size
T size(T... args)
std::stringstream
STL class.
std::function
std::unique_ptr::reset
T reset(T... args)
ripple::get_if_exists
bool get_if_exists(Section const &section, std::string const &name, T &v)
Definition: BasicConfig.h:347
std::vector::push_back
T push_back(T... args)
ripple::NodeStore::notFound
@ notFound
Definition: nodestore/Types.h:46
ripple::NodeStore::Batch
std::vector< std::shared_ptr< NodeObject > > Batch
A batch of NodeObjects to write at once.
Definition: nodestore/Types.h:55
ripple::NodeStore::Manager::insert
virtual void insert(Factory &factory)=0
Add a factory.
ripple::NodeStore::customCode
@ customCode
Definition: nodestore/Types.h:51
ripple::megabytes
constexpr auto megabytes(T value) noexcept
Definition: ByteUtilities.h:34
beast::Journal::error
Stream error() const
Definition: Journal.h:333
beast::Journal
A generic endpoint for log messages.
Definition: Journal.h:58
ripple::NodeStore::dataCorrupt
@ dataCorrupt
Definition: nodestore/Types.h:47
atomic
memory
std::experimental::filesystem::status
T status(T... args)
ripple::NodeStore::Status
Status
Return codes from Backend operations.
Definition: nodestore/Types.h:44
beast::setCurrentThreadName
void setCurrentThreadName(std::string_view name)
Changes the name of the caller thread.
Definition: CurrentThreadName.cpp:119
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
std::stringstream::str
T str(T... args)
beast::Journal::debug
Stream debug() const
Definition: Journal.h:315
std::size_t
ripple::NodeStore::Manager::instance
static Manager & instance()
Returns the instance of the manager singleton.
Definition: ManagerImp.cpp:120
std::unique_ptr
STL class.
std::data
T data(T... args)
ripple::open
void open(soci::session &s, BasicConfig const &config, std::string const &dbName)
Open a soci session.
Definition: SociDB.cpp:98