rippled
CassandraFactory.cpp
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2020 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #ifdef RIPPLED_REPORTING
21 
22 #include <cassandra.h>
23 #include <libpq-fe.h>
24 
25 #include <ripple/basics/Slice.h>
26 #include <ripple/basics/StringUtilities.h>
27 #include <ripple/basics/contract.h>
28 #include <ripple/basics/strHex.h>
29 #include <ripple/nodestore/Backend.h>
30 #include <ripple/nodestore/Factory.h>
31 #include <ripple/nodestore/Manager.h>
32 #include <ripple/nodestore/impl/DecodedBlob.h>
33 #include <ripple/nodestore/impl/EncodedBlob.h>
34 #include <ripple/nodestore/impl/codec.h>
35 #include <ripple/protocol/digest.h>
36 #include <boost/asio/steady_timer.hpp>
37 #include <boost/filesystem.hpp>
38 #include <atomic>
39 #include <cassert>
40 #include <chrono>
41 #include <cmath>
42 #include <cstdint>
43 #include <cstdio>
44 #include <cstring>
45 #include <exception>
46 #include <fstream>
47 #include <memory>
48 #include <mutex>
49 #include <nudb/nudb.hpp>
50 #include <queue>
51 #include <sstream>
52 #include <thread>
53 #include <utility>
54 #include <vector>
55 
56 namespace ripple {
57 namespace NodeStore {
58 
59 void
60 writeCallback(CassFuture* fut, void* cbData);
61 void
62 readCallback(CassFuture* fut, void* cbData);
63 
64 class CassandraBackend : public Backend
65 {
66 private:
67  // convenience function for one-off queries. For normal reads and writes,
68  // use the prepared statements insert_ and select_
69  CassStatement*
70  makeStatement(char const* query, std::size_t params)
71  {
72  CassStatement* ret = cass_statement_new(query, params);
73  CassError rc =
74  cass_statement_set_consistency(ret, CASS_CONSISTENCY_QUORUM);
75  if (rc != CASS_OK)
76  {
78  ss << "nodestore: Error setting query consistency: " << query
79  << ", result: " << rc << ", " << cass_error_desc(rc);
80  Throw<std::runtime_error>(ss.str());
81  }
82  return ret;
83  }
84 
85  beast::Journal const j_;
86  // size of a key
87  size_t const keyBytes_;
88 
89  Section const config_;
90 
91  std::atomic<bool> open_{false};
92 
93  // mutex used for open() and close()
94  std::mutex mutex_;
95 
96  std::unique_ptr<CassSession, void (*)(CassSession*)> session_{
97  nullptr,
98  [](CassSession* session) {
99  // Try to disconnect gracefully.
100  CassFuture* fut = cass_session_close(session);
101  cass_future_wait(fut);
102  cass_future_free(fut);
103  cass_session_free(session);
104  }};
105 
106  // Database statements cached server side. Using these is more efficient
107  // than making a new statement
108  const CassPrepared* insert_ = nullptr;
109  const CassPrepared* select_ = nullptr;
110 
111  // io_context used for exponential backoff for write retries
112  boost::asio::io_context ioContext_;
114  std::thread ioThread_;
115 
116  // maximum number of concurrent in flight requests. New requests will wait
117  // for earlier requests to finish if this limit is exceeded
118  uint32_t maxRequestsOutstanding = 10000000;
119  std::atomic_uint32_t numRequestsOutstanding_ = 0;
120 
121  // mutex and condition_variable to limit the number of concurrent in flight
122  // requests
123  std::mutex throttleMutex_;
124  std::condition_variable throttleCv_;
125 
126  // writes are asynchronous. This mutex and condition_variable is used to
127  // wait for all writes to finish
128  std::mutex syncMutex_;
129  std::condition_variable syncCv_;
130 
131  Counters<std::atomic<std::uint64_t>> counters_;
132 
133 public:
134  CassandraBackend(
135  size_t keyBytes,
136  Section const& keyValues,
137  beast::Journal journal)
138  : j_(journal), keyBytes_(keyBytes), config_(keyValues)
139  {
140  }
141 
142  ~CassandraBackend() override
143  {
144  close();
145  }
146 
148  getName() override
149  {
150  return "cassandra";
151  }
152 
153  bool
154  isOpen() override
155  {
156  return open_;
157  }
158 
159  // Setup all of the necessary components for talking to the database.
160  // Create the table if it doesn't exist already
161  // @param createIfMissing ignored
162  void
163  open(bool createIfMissing) override
164  {
165  if (open_)
166  {
167  assert(false);
168  JLOG(j_.error()) << "database is already open";
169  return;
170  }
171 
173  CassCluster* cluster = cass_cluster_new();
174  if (!cluster)
175  Throw<std::runtime_error>(
176  "nodestore:: Failed to create CassCluster");
177 
178  std::string secureConnectBundle = get(config_, "secure_connect_bundle");
179 
180  if (!secureConnectBundle.empty())
181  {
182  /* Setup driver to connect to the cloud using the secure connection
183  * bundle */
184  if (cass_cluster_set_cloud_secure_connection_bundle(
185  cluster, secureConnectBundle.c_str()) != CASS_OK)
186  {
187  JLOG(j_.error()) << "Unable to configure cloud using the "
188  "secure connection bundle: "
189  << secureConnectBundle;
190  Throw<std::runtime_error>(
191  "nodestore: Failed to connect using secure connection "
192  "bundle");
193  return;
194  }
195  }
196  else
197  {
198  std::string contact_points = get(config_, "contact_points");
199  if (contact_points.empty())
200  {
201  Throw<std::runtime_error>(
202  "nodestore: Missing contact_points in Cassandra config");
203  }
204  CassError rc = cass_cluster_set_contact_points(
205  cluster, contact_points.c_str());
206  if (rc != CASS_OK)
207  {
209  ss << "nodestore: Error setting Cassandra contact_points: "
210  << contact_points << ", result: " << rc << ", "
211  << cass_error_desc(rc);
212 
213  Throw<std::runtime_error>(ss.str());
214  }
215 
216  int port = get<int>(config_, "port");
217  if (port)
218  {
219  rc = cass_cluster_set_port(cluster, port);
220  if (rc != CASS_OK)
221  {
223  ss << "nodestore: Error setting Cassandra port: " << port
224  << ", result: " << rc << ", " << cass_error_desc(rc);
225 
226  Throw<std::runtime_error>(ss.str());
227  }
228  }
229  }
230  cass_cluster_set_token_aware_routing(cluster, cass_true);
231  CassError rc = cass_cluster_set_protocol_version(
232  cluster, CASS_PROTOCOL_VERSION_V4);
233  if (rc != CASS_OK)
234  {
236  ss << "nodestore: Error setting cassandra protocol version: "
237  << ", result: " << rc << ", " << cass_error_desc(rc);
238 
239  Throw<std::runtime_error>(ss.str());
240  }
241 
242  std::string username = get(config_, "username");
243  if (username.size())
244  {
245  std::cout << "user = " << username
246  << " password = " << get(config_, "password")
247  << std::endl;
248  cass_cluster_set_credentials(
249  cluster, username.c_str(), get(config_, "password").c_str());
250  }
251 
252  unsigned int const workers = std::thread::hardware_concurrency();
253  rc = cass_cluster_set_num_threads_io(cluster, workers);
254  if (rc != CASS_OK)
255  {
257  ss << "nodestore: Error setting Cassandra io threads to " << workers
258  << ", result: " << rc << ", " << cass_error_desc(rc);
259  Throw<std::runtime_error>(ss.str());
260  }
261 
262  cass_cluster_set_request_timeout(cluster, 2000);
263 
264  rc = cass_cluster_set_queue_size_io(
265  cluster,
266  maxRequestsOutstanding); // This number needs to scale w/ the
267  // number of request per sec
268  if (rc != CASS_OK)
269  {
271  ss << "nodestore: Error setting Cassandra max core connections per "
272  "host"
273  << ", result: " << rc << ", " << cass_error_desc(rc);
274  std::cout << ss.str() << std::endl;
275  return;
276  ;
277  }
278 
279  std::string certfile = get(config_, "certfile");
280  if (certfile.size())
281  {
282  std::ifstream fileStream(
283  boost::filesystem::path(certfile).string(), std::ios::in);
284  if (!fileStream)
285  {
287  ss << "opening config file " << certfile;
288  Throw<std::system_error>(
289  errno, std::generic_category(), ss.str());
290  }
291  std::string cert(
292  std::istreambuf_iterator<char>{fileStream},
294  if (fileStream.bad())
295  {
297  ss << "reading config file " << certfile;
298  Throw<std::system_error>(
299  errno, std::generic_category(), ss.str());
300  }
301 
302  CassSsl* context = cass_ssl_new();
303  cass_ssl_set_verify_flags(context, CASS_SSL_VERIFY_NONE);
304  rc = cass_ssl_add_trusted_cert(context, cert.c_str());
305  if (rc != CASS_OK)
306  {
308  ss << "nodestore: Error setting Cassandra ssl context: " << rc
309  << ", " << cass_error_desc(rc);
310  Throw<std::runtime_error>(ss.str());
311  }
312 
313  cass_cluster_set_ssl(cluster, context);
314  cass_ssl_free(context);
315  }
316 
317  std::string keyspace = get(config_, "keyspace");
318  if (keyspace.empty())
319  {
320  Throw<std::runtime_error>(
321  "nodestore: Missing keyspace in Cassandra config");
322  }
323 
324  std::string tableName = get(config_, "table_name");
325  if (tableName.empty())
326  {
327  Throw<std::runtime_error>(
328  "nodestore: Missing table name in Cassandra config");
329  }
330 
331  cass_cluster_set_connect_timeout(cluster, 10000);
332 
333  CassStatement* statement;
334  CassFuture* fut;
335  bool setupSessionAndTable = false;
336  while (!setupSessionAndTable)
337  {
339  session_.reset(cass_session_new());
340  assert(session_);
341 
342  fut = cass_session_connect_keyspace(
343  session_.get(), cluster, keyspace.c_str());
344  rc = cass_future_error_code(fut);
345  cass_future_free(fut);
346  if (rc != CASS_OK)
347  {
349  ss << "nodestore: Error connecting Cassandra session keyspace: "
350  << rc << ", " << cass_error_desc(rc);
351  JLOG(j_.error()) << ss.str();
352  continue;
353  }
354 
355  std::stringstream query;
356  query << "CREATE TABLE IF NOT EXISTS " << tableName
357  << " ( hash blob PRIMARY KEY, object blob)";
358 
359  statement = makeStatement(query.str().c_str(), 0);
360  fut = cass_session_execute(session_.get(), statement);
361  rc = cass_future_error_code(fut);
362  cass_future_free(fut);
363  cass_statement_free(statement);
364  if (rc != CASS_OK && rc != CASS_ERROR_SERVER_INVALID_QUERY)
365  {
367  ss << "nodestore: Error creating Cassandra table: " << rc
368  << ", " << cass_error_desc(rc);
369  JLOG(j_.error()) << ss.str();
370  continue;
371  }
372 
373  query = {};
374  query << "SELECT * FROM " << tableName << " LIMIT 1";
375  statement = makeStatement(query.str().c_str(), 0);
376  fut = cass_session_execute(session_.get(), statement);
377  rc = cass_future_error_code(fut);
378  cass_future_free(fut);
379  cass_statement_free(statement);
380  if (rc != CASS_OK)
381  {
382  if (rc == CASS_ERROR_SERVER_INVALID_QUERY)
383  {
384  JLOG(j_.warn()) << "table not here yet, sleeping 1s to "
385  "see if table creation propagates";
386  continue;
387  }
388  else
389  {
391  ss << "nodestore: Error checking for table: " << rc << ", "
392  << cass_error_desc(rc);
393  JLOG(j_.error()) << ss.str();
394  continue;
395  }
396  }
397 
398  setupSessionAndTable = true;
399  }
400 
401  cass_cluster_free(cluster);
402 
403  bool setupPreparedStatements = false;
404  while (!setupPreparedStatements)
405  {
407  std::stringstream query;
408  query << "INSERT INTO " << tableName
409  << " (hash, object) VALUES (?, ?)";
410  CassFuture* prepare_future =
411  cass_session_prepare(session_.get(), query.str().c_str());
412 
413  /* Wait for the statement to prepare and get the result */
414  rc = cass_future_error_code(prepare_future);
415 
416  if (rc != CASS_OK)
417  {
418  /* Handle error */
419  cass_future_free(prepare_future);
420 
422  ss << "nodestore: Error preparing insert : " << rc << ", "
423  << cass_error_desc(rc);
424  JLOG(j_.error()) << ss.str();
425  continue;
426  }
427 
428  /* Get the prepared object from the future */
429  insert_ = cass_future_get_prepared(prepare_future);
430 
431  /* The future can be freed immediately after getting the prepared
432  * object
433  */
434  cass_future_free(prepare_future);
435 
436  query = {};
437  query << "SELECT object FROM " << tableName << " WHERE hash = ?";
438  prepare_future =
439  cass_session_prepare(session_.get(), query.str().c_str());
440 
441  /* Wait for the statement to prepare and get the result */
442  rc = cass_future_error_code(prepare_future);
443 
444  if (rc != CASS_OK)
445  {
446  /* Handle error */
447  cass_future_free(prepare_future);
448 
450  ss << "nodestore: Error preparing select : " << rc << ", "
451  << cass_error_desc(rc);
452  JLOG(j_.error()) << ss.str();
453  continue;
454  }
455 
456  /* Get the prepared object from the future */
457  select_ = cass_future_get_prepared(prepare_future);
458 
459  /* The future can be freed immediately after getting the prepared
460  * object
461  */
462  cass_future_free(prepare_future);
463  setupPreparedStatements = true;
464  }
465 
466  work_.emplace(ioContext_);
467  ioThread_ = std::thread{[this]() { ioContext_.run(); }};
468  open_ = true;
469 
470  if (config_.exists("max_requests_outstanding"))
471  {
472  maxRequestsOutstanding =
473  get<int>(config_, "max_requests_outstanding");
474  }
475  }
476 
477  // Close the connection to the database
478  void
479  close() override
480  {
481  {
483  if (insert_)
484  {
485  cass_prepared_free(insert_);
486  insert_ = nullptr;
487  }
488  if (select_)
489  {
490  cass_prepared_free(select_);
491  select_ = nullptr;
492  }
493  work_.reset();
494  ioThread_.join();
495  }
496  open_ = false;
497  }
498 
499  // Synchronously fetch the object with key key and store the result in pno
500  // @param key the key of the object
501  // @param pno object in which to store the result
502  // @return result status of query
503  Status
504  fetch(void const* key, std::shared_ptr<NodeObject>* pno) override
505  {
506  JLOG(j_.trace()) << "Fetching from cassandra";
507  CassStatement* statement = cass_prepared_bind(select_);
508  cass_statement_set_consistency(statement, CASS_CONSISTENCY_QUORUM);
509  CassError rc = cass_statement_bind_bytes(
510  statement, 0, static_cast<cass_byte_t const*>(key), keyBytes_);
511  if (rc != CASS_OK)
512  {
513  cass_statement_free(statement);
514  JLOG(j_.error()) << "Binding Cassandra fetch query: " << rc << ", "
515  << cass_error_desc(rc);
516  pno->reset();
517  return backendError;
518  }
519  CassFuture* fut;
520  do
521  {
522  fut = cass_session_execute(session_.get(), statement);
523  rc = cass_future_error_code(fut);
524  if (rc != CASS_OK)
525  {
527  ss << "Cassandra fetch error";
528  ss << ", retrying";
529  ++counters_.readRetries;
530  ss << ": " << cass_error_desc(rc);
531  JLOG(j_.warn()) << ss.str();
532  }
533  } while (rc != CASS_OK);
534 
535  CassResult const* res = cass_future_get_result(fut);
536  cass_statement_free(statement);
537  cass_future_free(fut);
538 
539  CassRow const* row = cass_result_first_row(res);
540  if (!row)
541  {
542  cass_result_free(res);
543  pno->reset();
544  return notFound;
545  }
546  cass_byte_t const* buf;
547  std::size_t bufSize;
548  rc = cass_value_get_bytes(cass_row_get_column(row, 0), &buf, &bufSize);
549  if (rc != CASS_OK)
550  {
551  cass_result_free(res);
552  pno->reset();
553  JLOG(j_.error()) << "Cassandra fetch result error: " << rc << ", "
554  << cass_error_desc(rc);
555  ++counters_.readErrors;
556  return backendError;
557  }
558 
559  nudb::detail::buffer bf;
561  nodeobject_decompress(buf, bufSize, bf);
562  DecodedBlob decoded(key, uncompressed.first, uncompressed.second);
563  cass_result_free(res);
564 
565  if (!decoded.wasOk())
566  {
567  pno->reset();
568  JLOG(j_.error()) << "Cassandra error decoding result: " << rc
569  << ", " << cass_error_desc(rc);
570  ++counters_.readErrors;
571  return dataCorrupt;
572  }
573  *pno = decoded.createObject();
574  return ok;
575  }
576 
577  struct ReadCallbackData
578  {
579  CassandraBackend& backend;
580  const void* const key;
583 
584  std::atomic_uint32_t& numFinished;
585  size_t batchSize;
586 
587  ReadCallbackData(
588  CassandraBackend& backend,
589  const void* const key,
592  std::atomic_uint32_t& numFinished,
593  size_t batchSize)
594  : backend(backend)
595  , key(key)
596  , result(result)
597  , cv(cv)
598  , numFinished(numFinished)
599  , batchSize(batchSize)
600  {
601  }
602 
603  ReadCallbackData(ReadCallbackData const& other) = default;
604  };
605 
607  fetchBatch(std::vector<uint256 const*> const& hashes) override
608  {
609  std::size_t const numHashes = hashes.size();
610  JLOG(j_.trace()) << "Fetching " << numHashes
611  << " records from Cassandra";
612  std::atomic_uint32_t numFinished = 0;
614  std::mutex mtx;
615  std::vector<std::shared_ptr<NodeObject>> results{numHashes};
617  cbs.reserve(numHashes);
618  for (std::size_t i = 0; i < hashes.size(); ++i)
619  {
620  cbs.push_back(std::make_shared<ReadCallbackData>(
621  *this,
622  static_cast<void const*>(hashes[i]),
623  results[i],
624  cv,
625  numFinished,
626  numHashes));
627  read(*cbs[i]);
628  }
629  assert(results.size() == cbs.size());
630 
632  cv.wait(lck, [&numFinished, &numHashes]() {
633  return numFinished == numHashes;
634  });
635 
636  JLOG(j_.trace()) << "Fetched " << numHashes
637  << " records from Cassandra";
638  return {results, ok};
639  }
640 
641  void
642  read(ReadCallbackData& data)
643  {
644  CassStatement* statement = cass_prepared_bind(select_);
645  cass_statement_set_consistency(statement, CASS_CONSISTENCY_QUORUM);
646  CassError rc = cass_statement_bind_bytes(
647  statement, 0, static_cast<cass_byte_t const*>(data.key), keyBytes_);
648  if (rc != CASS_OK)
649  {
650  size_t batchSize = data.batchSize;
651  if (++(data.numFinished) == batchSize)
652  data.cv.notify_all();
653  cass_statement_free(statement);
654  JLOG(j_.error()) << "Binding Cassandra fetch query: " << rc << ", "
655  << cass_error_desc(rc);
656  return;
657  }
658 
659  CassFuture* fut = cass_session_execute(session_.get(), statement);
660 
661  cass_statement_free(statement);
662 
663  cass_future_set_callback(fut, readCallback, static_cast<void*>(&data));
664  cass_future_free(fut);
665  }
666 
667  struct WriteCallbackData
668  {
669  CassandraBackend* backend;
670  // The shared pointer to the node object must exist until it's
671  // confirmed persisted. Otherwise, it can become deleted
672  // prematurely if other copies are removed from caches.
674  NodeStore::EncodedBlob e;
676  std::chrono::steady_clock::time_point begin;
677  // The data is stored in this buffer. The void* in the above member
678  // is a pointer into the below buffer
679  nudb::detail::buffer bf;
680  std::atomic<std::uint64_t>& totalWriteRetries;
681 
682  uint32_t currentRetries = 0;
683 
684  WriteCallbackData(
685  CassandraBackend* f,
686  std::shared_ptr<NodeObject> const& nobj,
688  : backend(f), no(nobj), totalWriteRetries(retries)
689  {
690  e.prepare(no);
691 
692  compressed =
693  NodeStore::nodeobject_compress(e.getData(), e.getSize(), bf);
694  }
695  };
696 
697  void
698  write(WriteCallbackData& data, bool isRetry)
699  {
700  {
701  // We limit the total number of concurrent inflight writes. This is
702  // a client side throttling to prevent overloading the database.
703  // This is mostly useful when the very first ledger is being written
704  // in full, which is several millions records. On sufficiently large
705  // Cassandra clusters, this throttling is not needed; the default
706  // value of maxRequestsOutstanding is 10 million, which is more
707  // records than are present in any single ledger
708  std::unique_lock<std::mutex> lck(throttleMutex_);
709  if (!isRetry && numRequestsOutstanding_ > maxRequestsOutstanding)
710  {
711  JLOG(j_.trace()) << __func__ << " : "
712  << "Max outstanding requests reached. "
713  << "Waiting for other requests to finish";
714  ++counters_.writesDelayed;
715  throttleCv_.wait(lck, [this]() {
716  return numRequestsOutstanding_ < maxRequestsOutstanding;
717  });
718  }
719  }
720 
721  CassStatement* statement = cass_prepared_bind(insert_);
722  cass_statement_set_consistency(statement, CASS_CONSISTENCY_QUORUM);
723  CassError rc = cass_statement_bind_bytes(
724  statement,
725  0,
726  static_cast<cass_byte_t const*>(data.e.getKey()),
727  keyBytes_);
728  if (rc != CASS_OK)
729  {
730  cass_statement_free(statement);
732  ss << "Binding cassandra insert hash: " << rc << ", "
733  << cass_error_desc(rc);
734  JLOG(j_.error()) << __func__ << " : " << ss.str();
735  Throw<std::runtime_error>(ss.str());
736  }
737  rc = cass_statement_bind_bytes(
738  statement,
739  1,
740  static_cast<cass_byte_t const*>(data.compressed.first),
741  data.compressed.second);
742  if (rc != CASS_OK)
743  {
744  cass_statement_free(statement);
746  ss << "Binding cassandra insert object: " << rc << ", "
747  << cass_error_desc(rc);
748  JLOG(j_.error()) << __func__ << " : " << ss.str();
749  Throw<std::runtime_error>(ss.str());
750  }
752  CassFuture* fut = cass_session_execute(session_.get(), statement);
753  cass_statement_free(statement);
754 
755  cass_future_set_callback(fut, writeCallback, static_cast<void*>(&data));
756  cass_future_free(fut);
757  }
758 
759  void
760  store(std::shared_ptr<NodeObject> const& no) override
761  {
762  JLOG(j_.trace()) << "Writing to cassandra";
763  WriteCallbackData* data =
764  new WriteCallbackData(this, no, counters_.writeRetries);
765 
766  ++numRequestsOutstanding_;
767  write(*data, false);
768  }
769 
770  void
771  storeBatch(Batch const& batch) override
772  {
773  for (auto const& no : batch)
774  {
775  store(no);
776  }
777  }
778 
779  void
780  sync() override
781  {
782  std::unique_lock<std::mutex> lck(syncMutex_);
783 
784  syncCv_.wait(lck, [this]() { return numRequestsOutstanding_ == 0; });
785  }
786 
787  // Iterate through entire table and execute f(). Used for import only,
788  // with database not being written to, so safe to paginate through
789  // objects table with LIMIT x OFFSET y.
790  void
792  {
793  assert(false);
794  Throw<std::runtime_error>("not implemented");
795  }
796 
797  int
798  getWriteLoad() override
799  {
800  return 0;
801  }
802 
803  void
804  setDeletePath() override
805  {
806  }
807 
808  int
809  fdRequired() const override
810  {
811  return 0;
812  }
813 
815  counters() const override
816  {
817  return counters_;
818  }
819 
820  friend void
821  writeCallback(CassFuture* fut, void* cbData);
822 
823  friend void
824  readCallback(CassFuture* fut, void* cbData);
825 };
826 
827 // Process the result of an asynchronous read. Retry on error
828 // @param fut cassandra future associated with the read
829 // @param cbData struct that holds the request parameters
830 void
831 readCallback(CassFuture* fut, void* cbData)
832 {
833  CassandraBackend::ReadCallbackData& requestParams =
834  *static_cast<CassandraBackend::ReadCallbackData*>(cbData);
835 
836  CassError rc = cass_future_error_code(fut);
837 
838  if (rc != CASS_OK)
839  {
840  ++(requestParams.backend.counters_.readRetries);
841  JLOG(requestParams.backend.j_.warn())
842  << "Cassandra fetch error : " << rc << " : " << cass_error_desc(rc)
843  << " - retrying";
844  // Retry right away. The only time the cluster should ever be overloaded
845  // is when the very first ledger is being written in full (millions of
846  // writes at once), during which no reads should be occurring. If reads
847  // are timing out, the code/architecture should be modified to handle
848  // greater read load, as opposed to just exponential backoff
849  requestParams.backend.read(requestParams);
850  }
851  else
852  {
853  auto finish = [&requestParams]() {
854  size_t batchSize = requestParams.batchSize;
855  if (++(requestParams.numFinished) == batchSize)
856  requestParams.cv.notify_all();
857  };
858  CassResult const* res = cass_future_get_result(fut);
859 
860  CassRow const* row = cass_result_first_row(res);
861  if (!row)
862  {
863  cass_result_free(res);
864  JLOG(requestParams.backend.j_.error())
865  << "Cassandra fetch get row error : " << rc << ", "
866  << cass_error_desc(rc);
867  finish();
868  return;
869  }
870  cass_byte_t const* buf;
871  std::size_t bufSize;
872  rc = cass_value_get_bytes(cass_row_get_column(row, 0), &buf, &bufSize);
873  if (rc != CASS_OK)
874  {
875  cass_result_free(res);
876  JLOG(requestParams.backend.j_.error())
877  << "Cassandra fetch get bytes error : " << rc << ", "
878  << cass_error_desc(rc);
879  ++requestParams.backend.counters_.readErrors;
880  finish();
881  return;
882  }
883  nudb::detail::buffer bf;
885  nodeobject_decompress(buf, bufSize, bf);
886  DecodedBlob decoded(
887  requestParams.key, uncompressed.first, uncompressed.second);
888  cass_result_free(res);
889 
890  if (!decoded.wasOk())
891  {
892  JLOG(requestParams.backend.j_.fatal())
893  << "Cassandra fetch error - data corruption : " << rc << ", "
894  << cass_error_desc(rc);
895  ++requestParams.backend.counters_.readErrors;
896  finish();
897  return;
898  }
899  requestParams.result = decoded.createObject();
900  finish();
901  }
902 }
903 
904 // Process the result of an asynchronous write. Retry on error
905 // @param fut cassandra future associated with the write
906 // @param cbData struct that holds the request parameters
907 void
908 writeCallback(CassFuture* fut, void* cbData)
909 {
910  CassandraBackend::WriteCallbackData& requestParams =
911  *static_cast<CassandraBackend::WriteCallbackData*>(cbData);
912  CassandraBackend& backend = *requestParams.backend;
913  auto rc = cass_future_error_code(fut);
914  if (rc != CASS_OK)
915  {
916  JLOG(backend.j_.error())
917  << "ERROR!!! Cassandra insert error: " << rc << ", "
918  << cass_error_desc(rc) << ", retrying ";
919  ++requestParams.totalWriteRetries;
920  // exponential backoff with a max wait of 2^10 ms (about 1 second)
921  auto wait = std::chrono::milliseconds(
922  lround(std::pow(2, std::min(10u, requestParams.currentRetries))));
923  ++requestParams.currentRetries;
925  std::make_shared<boost::asio::steady_timer>(
926  backend.ioContext_, std::chrono::steady_clock::now() + wait);
927  timer->async_wait([timer, &requestParams, &backend](
928  const boost::system::error_code& error) {
929  backend.write(requestParams, true);
930  });
931  }
932  else
933  {
934  backend.counters_.writeDurationUs +=
935  std::chrono::duration_cast<std::chrono::microseconds>(
936  std::chrono::steady_clock::now() - requestParams.begin)
937  .count();
938  --(backend.numRequestsOutstanding_);
939 
940  backend.throttleCv_.notify_all();
941  if (backend.numRequestsOutstanding_ == 0)
942  backend.syncCv_.notify_all();
943  delete &requestParams;
944  }
945 }
946 
947 //------------------------------------------------------------------------------
948 
949 class CassandraFactory : public Factory
950 {
951 public:
952  CassandraFactory()
953  {
954  Manager::instance().insert(*this);
955  }
956 
957  ~CassandraFactory() override
958  {
959  Manager::instance().erase(*this);
960  }
961 
963  getName() const override
964  {
965  return "cassandra";
966  }
967 
969  createInstance(
970  size_t keyBytes,
971  Section const& keyValues,
972  std::size_t burstSize,
973  Scheduler& scheduler,
974  beast::Journal journal) override
975  {
976  return std::make_unique<CassandraBackend>(keyBytes, keyValues, journal);
977  }
978 };
979 
980 static CassandraFactory cassandraFactory;
981 
982 } // namespace NodeStore
983 } // namespace ripple
984 #endif
ripple::NodeStore::nodeobject_decompress
std::pair< void const *, std::size_t > nodeobject_decompress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:100
sstream
std::this_thread::sleep_for
T sleep_for(T... args)
std::lock
T lock(T... args)
std::for_each
T for_each(T... args)
fstream
ripple::NodeStore::read
void read(nudb::detail::istream &is, std::size_t &u)
Definition: varint.h:120
std::string
STL class.
std::shared_ptr< NodeObject >
utility
exception
cstring
beast::Journal::trace
Stream trace() const
Severity stream access functions.
Definition: Journal.h:309
ripple::NodeStore::ok
@ ok
Definition: nodestore/Types.h:45
ripple::NodeStore::Manager::erase
virtual void erase(Factory &factory)=0
Remove a factory.
std::pair
std::vector::reserve
T reserve(T... args)
vector
std::string::size
T size(T... args)
std::chrono::seconds
std::optional::emplace
T emplace(T... args)
std::stringstream
STL class.
beast::Journal::warn
Stream warn() const
Definition: Journal.h:327
std::lock_guard
STL class.
std::function
ripple::NodeStore::backendError
@ backendError
Definition: nodestore/Types.h:49
queue
cmath
ripple::NodeStore::write
void write(nudb::detail::ostream &os, std::size_t t)
Definition: varint.h:133
std::optional::reset
T reset(T... args)
std::vector::push_back
T push_back(T... args)
ripple::NodeStore::notFound
@ notFound
Definition: nodestore/Types.h:46
std::cout
ripple::NodeStore::Batch
std::vector< std::shared_ptr< NodeObject > > Batch
A batch of NodeObjects to write at once.
Definition: nodestore/Types.h:55
ripple::NodeStore::Manager::insert
virtual void insert(Factory &factory)=0
Add a factory.
thread
std::thread::hardware_concurrency
T hardware_concurrency(T... args)
std::generic_category
T generic_category(T... args)
chrono
std::string::c_str
T c_str(T... args)
std::unique_lock
STL class.
beast::Journal::error
Stream error() const
Definition: Journal.h:333
cstdint
beast::Journal
A generic endpoint for log messages.
Definition: Journal.h:58
ripple::NodeStore::dataCorrupt
@ dataCorrupt
Definition: nodestore/Types.h:47
std::condition_variable::wait
T wait(T... args)
atomic
std::istreambuf_iterator
memory
std::min
T min(T... args)
ripple::NodeStore::Status
Status
Return codes from Backend operations.
Definition: nodestore/Types.h:44
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
std::lround
T lround(T... args)
std::endl
T endl(T... args)
std::begin
T begin(T... args)
cassert
std::condition_variable
std::string::empty
T empty(T... args)
std::optional
mutex
std::stringstream::str
T str(T... args)
std::size_t
ripple::NodeStore::nodeobject_compress
std::pair< void const *, std::size_t > nodeobject_compress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:211
ripple::NodeStore::Manager::instance
static Manager & instance()
Returns the instance of the manager singleton.
Definition: ManagerImp.cpp:120
std::unique_ptr
STL class.
std::data
T data(T... args)
cstdio
std::thread::join
T join(T... args)
ripple::get
T & get(EitherAmount &amt)
Definition: AmountSpec.h:118
ripple::open
void open(soci::session &s, BasicConfig const &config, std::string const &dbName)
Open a soci session.
Definition: SociDB.cpp:98
std::ifstream
STL class.
std::pow
T pow(T... args)
std::chrono::steady_clock::now
T now(T... args)